Hadoop2.7实战v1.0之Flume1.6.0搭建(Http Source-->Memory Chanel --> Hdfs Sink)
1.查看系统是否已经配置jdk1.7.0
点击(此处)折叠或打开
- 
					[root@xxx-01 jdk1.7.0_25]# bin/java -version
 
- 
					java version "1.7.0_25"
 
- 
					Java(TM) SE Runtime Environment (build 1.7.0_25-b15)
 
- 
					Java HotSpot(TM) 64-Bit Server VM (build 23.25-b01, mixed mode)
 
- 
					[root@xxx-01 jdk1.7.0_25]# pwd
 
- 
					/usr/java/jdk1.7.0_25
 
- [root@xxx-01 jdk1.7.0_25]
		
	
		##假如没有配置jdk1.7以上的版本,请参考 http://blog.itpub.net/30089851/viewspace-1994585/ 的 "安装JDK"
 
	
2.下载和解压flume1.6.0
点击(此处)折叠或打开
- 
						[root@xxx-01 local]# wget http://ftp.cuhk.edu.hk/pub/packages/apache.org/flume/1.6.0/apache-flume-1.6.0-bin.tar.gz
 
- 
						[root@xxx-01 local]# tar zxvf apache-flume-1.6.0-bin.tar.gz
 
- 
						[root@xxx-01 local]# cd apache-flume-1.6.0-bin
 
- 
						[root@xxx-01 apache-flume-1.6.0-bin]# ll
 
- 
						total 140
 
- 
						drwxr-xr-x  2 template games  4096 May 21 17:32 bin
 
- 
						-rw-r--r-- 1 template games 69856 May  9  2015 CHANGELOG
 
- 
						drwxr-xr-x  2 template games  4096 May 21 17:32 conf
 
- 
						-rw-r--r-- 1 template games  6172 May  9  2015 DEVNOTES
 
- 
						drwxr-xr-x 10 template games  4096 May 12  2015 docs
 
- 
						drwxr-xr-x  2 root     root   4096 May 21 17:32 lib
 
- 
						-rw-r--r-- 1 template games 25903 May  9  2015 LICENSE
 
- 
						-rw-r--r-- 1 template games   249 May  9  2015 NOTICE
 
- 
						-rw-r--r-- 1 template games  1779 May  9  2015 README
 
- 
						-rw-r--r-- 1 template games  1585 May  9  2015 RELEASE-NOTES
 
- 
						drwxr-xr-x  2 root     root   4096 May 21 17:32 tools
 
- 
						[root@xxx-01 apache-flume-1.6.0-bin]# 
 
- 
						[root@xxx-01 apache-flume-1.6.0-bin]# cd conf
 
- 
						[root@xxx-01 conf]# ls -l
 
- 
						total 16
 
- 
						-rw-r--r-- 1 template games 1661 May  9  2015 flume-conf.properties.template
 
- 
						-rw-r--r-- 1 template games 1110 May  9  2015 flume-env.ps1.template
 
- 
						-rw-r--r-- 1 template games 1214 May  9  2015 flume-env.sh.template
 
- 
						-rw-r--r-- 1 template games 3107 May  9  2015 log4j.properties
 
- 
						[root@xxx-01 conf]# 
 
- 
						[root@xxx-01 conf]# cp flume-env.sh.template  flume-env.sh
 
- 
						[root@xxx-01 conf]# cp flume-conf.properties.template  flume-conf.properties
 
- [root@xxx-01 conf]#
3.配置flume-env.sh 和环境变量
点击(此处)折叠或打开
- 
						[root@xxx-01 conf]# vi flume-env.sh
 
- 
						export JAVA_HOME=/usr/java/jdk1.7.0_25
 
- 
						export HADOOP_HOME=/opt/cloudera/parcels/CDH/lib/hadoop
 
- 
						[root@xxx-01 ~]# vi /etc/profile
 
- 
						export JAVA_HOME="/usr/java/jdk1.7.0_25"
 
- 
						export FLUME_HOME=/usr/local/apache-flume-1.6.0-bin
 
- 
						export FLUME_CONF_DIR=$FLUME_HOME/conf 
 
- 
						
 
- 
						export PATH=$FLUME_HOME/bin:$JAVA_HOME/bin:$PATH 
 
- 
						
 
- 
						[root@xxx-01 ~]# source /etc/profile
 
- 
						[root@xxx-01 ~]# echo $FLUME_HOME
 
- /usr/local/apache-flume-1.6.0-bin
4.配置flume-conf.properties
点击(此处)折叠或打开
- 
						[root@xxx-01 conf]# vi flume-conf.properties
 
- 
						# Name the components on this agent
 
- 
						a1.sources = r1
 
- 
						a1.sinks = k1
 
- 
						a1.channels = c1
 
- 
						
 
- 
						# Describe/configure the source
 ###默认http handle的格式是json
- 
						a1.sources.r1.type = http
 
- 
						a1.sources.r1.bind = 本机ip
 
- 
						a1.sources.r1.port = 5140
 
- 
						
 
- 
						a1.sources.r1.fileHeader = false
 
- 
						#a1.sources.r1.deserializer.outputCharset=UTF-8
 
- 
						
 
- 
						a1.sources.r1.interceptors =i1
 
- 
						a1.sources.r1.interceptors.i1.type = timestamp
 
- 
						
 
- 
						# Describe the sink
 
- 
						a1.sinks.k1.type = hdfs
 
- 
						a1.sinks.k1.channel = c1
 
- # 可以指定hdfs ha的fs.defaultFS配置信息,而不是指定其中一台master的,关键是当前flume机器要有hadoop环境(因为要加载hadoop jar包)
- #和在flume机器上这三个hadoop-env.sh hdfs-site.xml core-site.xml文件要与 日志存储的hdfs配置一致.
- a1.sinks.k1.hdfs.path = hdfs://nameservice1/testwjp/%Y-%m-%d/%H
- 
						#a1.sinks.k1.hdfs.path = hdfs://xxx-01:8022/testwjp/%Y-%m-%d/%H
 
- 
						a1.sinks.k1.hdfs.filePrefix = logs
 
- 
						a1.sinks.k1.hdfs.inUsePrefix = .
 
- 
						
 
- 
						a1.sinks.k1.hdfs.rollInterval = 0
 
- 
						### roll 16 m = 16777216 bytes
 
- 
						a1.sinks.k1.hdfs.rollSize = 16777216
 
- 
						a1.sinks.k1.hdfs.rollCount = 0
 
- 
						a1.sinks.k1.hdfs.batchSize = 1000
 
- 
						a1.sinks.k1.hdfs.writeFormat = text
 
- 
						
 
- 
						###A. plain text format 普通文本格式
 
- 
						a1.sinks.k1.hdfs.fileType = DataStream
 
- 
						
 
- 
						###B. compressed plain text to zip format 压缩格式
 
- 
						#a1.sinks.k1.hdfs.fileType = CompressedStream
 
- 
						#a1.sinks.k1.hdfs.codeC = bzip2
 
- 
						
 
- 
						# Use a channel which buffers events in memory
 
- 
						a1.channels.c1.type = memory
 
- 
						a1.channels.c1.keep-alive = 30 
 
- 
						a1.channels.c1.capacity = 100000
 
- 
						a1.channels.c1.transactionCapacity = 1000
 
- 
						# Bind the source and sink to the channel
 
- 
						a1.sources.r1.channels = c1
 
- a1.sinks.k1.channel = c1
			5.启动agent
[root@xxx-01 apache-flume-1.6.0-bin]#bin/flume-ng agent -c conf -f conf/http-memory-hdfs.properties -n a1 -Dflume.root.logger=INFO,console 
		
			
###后台启动
nohup bin/flume-ng agent -c conf -f conf/http-memory-hdfs.properties -n a1 -Dflume.root.logger=INFO,console & 
		
			
6.测试端A
[root@xxx-01 bin]# curl -X POST -d'[{"headers":{"h1":"v1","h2":"v2"},"body":"hello body"}]'  http://10.168.11.13:5140 
		
			
7.测试端B Fox浏览器+HttpRequester工具
 URL:http://本机ip:5140
Type:POST
Content Type: application/json
Content: 
		
点击(此处)折叠或打开
- 
						[{
 
- 
						   "headers" : {
 
- 
						       "timestamp" : "1",
 
- 
						       "host" : "random_host1.example.com"
 
- 
						       },
 
- 
						   "body" : "random_body1"
 
- 
						   },
 
- 
						   {
 
- 
						   "headers" : {
 
- 
						        "timestamp" : "2",
 
- 
						       "host" : "random_host2.example.com"
 
- 
						       },
 
- 
						   "body" : "random_body2"
 
- }]
			 ###单击"Submit"按钮,返回状态 200,标识内容post成功.
  
 
 
8.验证数据是否sink到hdfs上
命令行:hadoop fs -ls hdfs://nameservice1/testwjp/
Web:http://x.x.x.x:50070/
官方文档: http://flume.apache.org/FlumeUserGuide.html