-- fox.conf
# Name the components on this agent
# fox -> zoo -> koala
agent.sinks = koala
agent.sources = fox
agent.channels = zoo
# Describe/configure the source
agent.sources.fox.type = spooldir
agent.sources.fox.spoolDir = /home/flume/dump
# Describe the sink
agent.sinks.koala.type = hdfs
agent.sinks.koala.hdfs.path = /flume/events
agent.sinks.koala.hdfs.fileType = DataStream
agent.sinks.koala.hdfs.writeFormat = Text
agent.sinks.koala.hdfs.rollSize = 0
agent.sinks.koala.hdfs.rollCount = 10000
# Use a channel which buffers events in memory
agent.channels.zoo.type = file
# Bind the source and sink to the channel
agent.sources.fox.channels = zoo
agent.sinks.koala.channel = zoo
-- beginning with configuration (fox.conf)
shell$ flume-ng agent --conf conf --conf-file fox.conf --name agent
2. hcatalog
hcat -e "create table koala (cnt bigint, wd string)"
3. pig
a = load '/flume/events/*';
b = foreach a generate flatten(TOKENIZE((chararray)$0)) as word;
c = group b by word;
d = foreach c generate COUNT(b) as cnt, group as wd;
store d into 'koala' using org.apache.hcatalog.pig.HCatStorer();
4. hive
select wd, cnt from koala order by cnt desc limit 10;
0 개의 댓글:
댓글 쓰기