SeaTunnel是什么
SeaTunnel是一个简单易用,高性能,能够应对海量数据的数据处理产品。
2021年12月9日,SeaTunnel正式通过Apache软件基金会的投票决议,以全票通过的优秀表现正式成为Apache孵化器项目。
SeaTunnel的应用场景
SeaTunnel适用于以下场景:
海量数据的同步
海量数据的集成
海量数据的ETL
海量数据聚合
多源数据处理
SeaTunnel的特点:
基于配置的低代码开发,易用性高,方便维护。
支持实时流式传输
离线多源数据分析
高性能、海量数据处理能力
模块化的插件架构,易于扩展
支持用SQL进行数据操作和数据聚合
支持Spark structured streaming
支持Spark 2.x
目前SeaTunnel的长板是他有丰富的连接器,又因为它以Spark和Flink为引擎。
所以可以很好地进行分布式的海量数据同步。
通常SeaTunnel会被用来做出仓入仓工具,或者被用来进行数据集成.
[root@almalinux ~]# ll -d */
drwxr-xr-x. 10 root root 156 2月 4 2022 flink-1.13.6/
drwxr-xr-x. 8 root root 155 2月 17 2022 seatunnel2.1/
drwxr-xr-x. 15 root root 235 6月 5 03:15 spark-3.0.3-hadoop3.2/
[root@almalinux ~]# flink-1.13.6/bin/start-cluster.sh
Starting cluster.
Starting standalonesession daemon on host almalinux.test.com.
Starting taskexecutor daemon on host almalinux.test.com.
[root@almalinux ~]# netstat -nltp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 813/sshd
tcp6 0 0 :::22 :::* LISTEN 813/sshd
tcp6 0 0 :::34143 :::* LISTEN 5705/java
tcp6 0 0 :::33575 :::* LISTEN 5426/java
tcp6 0 0 :::35591 :::* LISTEN 5426/java
tcp6 0 0 :::6123 :::* LISTEN 5426/java
tcp6 0 0 :::40333 :::* LISTEN 5705/java
tcp6 0 0 :::45039 :::* LISTEN 5705/java
tcp6 0 0 :::8081 :::* LISTEN 5426/java
[root@almalinux ~]# curl 127.0.0.1:8081
[root@almalinux ~]# ll -d */
drwxr-xr-x. 10 root root 156 2月 4 2022 flink-1.13.6/
drwxr-xr-x. 8 root root 155 2月 17 2022 seatunnel2.1/
drwxr-xr-x. 15 root root 235 6月 5 03:15 spark-3.0.3-hadoop3.2/
[root@almalinux ~]# cd seatunnel2.1/
[root@almalinux seatunnel2.1]# pwd
/root/seatunnel2.1
[root@almalinux seatunnel2.1]# ./bin/start-seatunnel-flink.sh --config ./config/flink.streaming.conf.template
Export JVM_ARGS: -Dexecution.parallelism=1
Execute SeaTunnel Flink Job: ${FLINK_HOME}/bin/flink run -c org.apache.seatunnel.core.flink.SeatunnelFlink /root/seatunnel2.1/lib/seatunnel-core-flink.jar --config ./config/flink.streaming.conf.template
Job has been submitted with JobID 8c199e8cc72ce021ff2cf4f8fb7bd2be
[root@almalinux seatunnel2.1]#
[root@almalinux seatunnel2.1]#
[root@almalinux seatunnel2.1]#
[root@almalinux seatunnel2.1]# cd ..
[root@almalinux ~]# ll -d */
drwxr-xr-x. 10 root root 156 2月 4 2022 flink-1.13.6/
drwxr-xr-x. 8 root root 155 2月 17 2022 seatunnel2.1/
drwxr-xr-x. 15 root root 235 6月 5 03:15 spark-3.0.3-hadoop3.2/
[root@almalinux ~]# cd spark-3.0.3-hadoop3.2/
[root@almalinux spark-3.0.3-hadoop3.2]# ./sbin/start-all.sh
starting org.apache.spark.deploy.master.Master, logging to /root/spark-3.0.3-hadoop3.2/logs/spark-root-org.apache.spark.deploy.master.Master-1-almalinux.test.com.out
root@localhost's password:
localhost: starting org.apache.spark.deploy.worker.Worker, logging to /root/spark-3.0.3-hadoop3.2/logs/spark-root-org.apache.spark.deploy.worker.Worker-1-almalinux.test.com.out
[root@almalinux spark-3.0.3-hadoop3.2]# netstat -nltp
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN 813/sshd
tcp6 0 0 :::22 :::* LISTEN 813/sshd
tcp6 0 0 192.168.100.108:40505 :::* LISTEN 6163/java
tcp6 0 0 :::34143 :::* LISTEN 5705/java
tcp6 0 0 127.0.0.1:7077 :::* LISTEN 6058/java
tcp6 0 0 :::33575 :::* LISTEN 5426/java
tcp6 0 0 :::35591 :::* LISTEN 5426/java
tcp6 0 0 :::6123 :::* LISTEN 5426/java
tcp6 0 0 :::40333 :::* LISTEN 5705/java
tcp6 0 0 :::45039 :::* LISTEN 5705/java
tcp6 0 0 :::8080 :::* LISTEN 6058/java
tcp6 0 0 :::8081 :::* LISTEN 5426/java
tcp6 0 0 :::8082 :::* LISTEN 6163/java
[root@almalinux ~]# cd seatunnel2.1/config/
[root@almalinux config]# pwd
/root/seatunnel2.1/config
[root@almalinux config]# more seatunnel-env.sh
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Home directory of spark distribution.
#SPARK_HOME=${SPARK_HOME:-/opt/spark}
SPARK_HOME=/root/spark-3.0.3-hadoop3.2
# Home directory of flink distribution.
#FLINK_HOME=${FLINK_HOME:-/opt/flink}
FLINK_HOME=/root/flink-1.13.6/
# Control whether to print the ascii logo
export SEATUNNEL_PRINT_ASCII_LOGO=true