一、搭建
1、解壓
tar -zxf /opt/software/Apache-flume-1.9.0-bin.tar.gz -C /opt/module/
mv /opt/module/apache-flume-1.9.0-bin /opt/module/flume
2、刪除不兼容包
rm /opt/module/flume/lib/guava-11.0.2.jar
3、測試
1、安裝nc
yum install -y nc
2、創(chuàng)建agent配置文件
cd /opt/module/flume/job
vim flume-netcat-logger.conf
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
3、啟動
vi flume.sh
/opt/module/flume/bin/flume-ng agent --conf /opt/module/flume/conf/ --name a1 --conf-file /opt/module/flume/job/flume-netcat-logger.conf -Dflume.root.logger=INFO,console
sh flume.sh
參數(shù)說明:
--conf/-c:表示配置文件存儲在conf/目錄
--name/-n:表示給agent起名為a1
--conf-file/-f:flume本次啟動讀取的配置文件是在job文件夾下的flume-telnet.conf文件。
-Dflume.root.logger=INFO,console :-D表示flume運行時動態(tài)修改flume.root.logger參數(shù)屬性值,并將控制臺日志打印級別設(shè)置為INFO級別。日志級別包括:log、info、warn、error。
4、啟動nc輸入數(shù)據(jù)
nc localhost 44444
5、檢查數(shù)據(jù)
二、使用
1、Flume數(shù)據(jù)輸出到HDFS
1、flume文本
vim flume-file-hdfs.conf
# Name the components on this agent
a2.sources = r2
a2.sinks = k2
a2.channels = c2
# Describe/configure the source
a2.sources.r2.type = exec
a2.sources.r2.command = tail -F /opt/module/hive/logs/hive.log
# Describe the sink
a2.sinks.k2.type = hdfs
a2.sinks.k2.hdfs.path = hdfs://bigdata:9820/flume/%Y%m%d/%H
#上傳文件的前綴
a2.sinks.k2.hdfs.filePrefix = logs-
#是否按照時間滾動文件夾
a2.sinks.k2.hdfs.round = true
#多少時間單位創(chuàng)建一個新的文件夾
a2.sinks.k2.hdfs.roundValue = 1
#重新定義時間單位
a2.sinks.k2.hdfs.roundUnit = hour
#是否使用本地時間戳
a2.sinks.k2.hdfs.useLocalTimeStamp = true
#積攢多少個Event才flush到HDFS一次
a2.sinks.k2.hdfs.batchSize = 100
#設(shè)置文件類型,可支持壓縮
a2.sinks.k2.hdfs.fileType = DataStream
#多久生成一個新的文件
a2.sinks.k2.hdfs.rollInterval = 60
#設(shè)置每個文件的滾動大小
a2.sinks.k2.hdfs.rollSize = 134217700
#文件的滾動與Event數(shù)量無關(guān)
a2.sinks.k2.hdfs.rollCount = 0
# Use a channel which buffers events in memory
a2.channels.c2.type = memory
a2.channels.c2.capacity = 1000
a2.channels.c2.transactionCapacity = 100
# Bind the source and sink to the channel
a2.sources.r2.channels = c2
a2.sinks.k2.channel = c2
2、啟動腳本
vim flume-file-hdfs.sh
/opt/module/flume/bin/flume-ng agent --conf /opt/module/flume/conf/ --name a2 --conf-file /opt/module/flume/job/flume-file-hdfs.conf -Dflume.root.logger=INFO,console
sh flume-file-hdfs.sh
3、hdfs頁面驗證數(shù)據(jù)
2、Flume監(jiān)聽整個目錄的實時追加文件,并上傳至HDFS
1、flume文本
vim flume-taildir-hdfs.conf
a3.sources = r3
a3.sinks = k3
a3.channels = c3
# Describe/configure the source
a3.sources.r3.type = TAILDIR
a3.sources.r3.positionFile = /opt/module/flume/tail_dir.json
a3.sources.r3.filegroups = f1 f2
a3.sources.r3.filegroups.f1 = /opt/module/flume/files/.*file.*
a3.sources.r3.filegroups.f2 = /opt/module/flume/files2/.*log.*
# Describe the sink
a3.sinks.k3.type = hdfs
a3.sinks.k3.hdfs.path = hdfs://bigdata:9820/flume/upload2/%Y%m%d/%H
#上傳文件的前綴
a3.sinks.k3.hdfs.filePrefix = upload-
#是否按照時間滾動文件夾
a3.sinks.k3.hdfs.round = true
#多少時間單位創(chuàng)建一個新的文件夾
a3.sinks.k3.hdfs.roundValue = 1
#重新定義時間單位
a3.sinks.k3.hdfs.roundUnit = hour
#是否使用本地時間戳
a3.sinks.k3.hdfs.useLocalTimeStamp = true
#積攢多少個Event才flush到HDFS一次
a3.sinks.k3.hdfs.batchSize = 100
#設(shè)置文件類型,可支持壓縮
a3.sinks.k3.hdfs.fileType = DataStream
#多久生成一個新的文件
a3.sinks.k3.hdfs.rollInterval = 60
#設(shè)置每個文件的滾動大小大概是128M
a3.sinks.k3.hdfs.rollSize = 134217700
#文件的滾動與Event數(shù)量無關(guān)
a3.sinks.k3.hdfs.rollCount = 0
# Use a channel which buffers events in memory
a3.channels.c3.type = memory
a3.channels.c3.capacity = 1000
a3.channels.c3.transactionCapacity = 100
# Bind the source and sink to the channel
a3.sources.r3.channels = c3
a3.sinks.k3.channel = c3
2、啟動腳本
vi flume-taildir-hdfs.sh
/opt/module/flume/bin/flume-ng agent --conf /opt/module/flume/conf/ --name a3 --conf-file /opt/module/flume/job/flume-taildir-hdfs.conf -Dflume.root.logger=INFO,console
sh flume-taildir-hdfs.sh
3、測試驗證數(shù)據(jù)
在/opt/module/flume目錄下創(chuàng)建files和files2文件夾
在files下
echo hello >> file1.txt
echo hello >> file2.txt
在files2下
echo hello >> log1.txt
echo hello >> log2.txt