storm有个经典的例子wordcount,其实这几乎可以说是大数据的经典例子了,mapreduce也会有这个例子。但是storm给的例子包里的WordCountTopology用到了python的调用,直接用eclipse跑起来的话会报错,这里做了个小改动。
1、WordCountTopology.java
package storm.starter;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.task.ShellBolt;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.FailedException;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.topology.base.BaseBasicBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import storm.starter.spout.RandomSentenceSpout;
import java.util.HashMap;
import java.util.Map;
/**
* This topology demonstrates Storm‘s stream groupings and multilang capabilities.
*/
public class WordCountTopology {
// public static class SplitSentence extends ShellBolt implements IRichBolt {
//
// public SplitSentence() {
// super("python", "splitsentence.py");
// }
//
// @Override
// public void declareOutputFields(OutputFieldsDeclarer declarer) {
// declarer.declare(new Fields("word"));
// }
//
// @Override
// public Map<String, Object> getComponentConfiguration() {
// return null;
// }
// }
public static class SplitSentence extends BaseBasicBolt {
String patton ;
public SplitSentence(String patton) {
this.patton = patton;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
try {
String sen = tuple.getString(0);
if(sen != null)
{
for(String word : sen.split(patton))
{
collector.emit(new Values(word));
}
}
} catch (Exception e) {
throw new FailedException("split fail!");
}
}
}
public static class WordCount extends BaseBasicBolt {
Map<String, Integer> counts = new HashMap<String, Integer>();
@Override
public void execute(Tuple tuple, BasicOutputCollector collector) {
// String word = tuple.getString(0);
String word = tuple.getStringByField("word");
Integer count = counts.get(word);
if (count == null)
count = 0;
count++;
counts.put(word, count);
// collector.emit(new Values(word, count));
System.err.println("word="+word+"; word_count="+count);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// declarer.declare(new Fields("word", "count"));
}
}
public static void main(String[] args) throws Exception {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new RandomSentenceSpout(), 1);
builder.setBolt("split", new SplitSentence(" "), 1)
.shuffleGrouping("spout");
builder.setBolt("count", new WordCount(), 1)
.fieldsGrouping("split", new Fields("word"));
Config conf = new Config();
conf.setDebug(false);
if (args != null && args.length > 0) {
conf.setNumWorkers(3);
StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
}
else {
conf.setMaxTaskParallelism(3);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("word-count", conf, builder.createTopology());
Thread.sleep(5000);
cluster.shutdown();
}
}
}
2、spout的实现类RandomSentenceSpout.java
package storm.starter.spout;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import backtype.storm.utils.Utils;
import java.util.Map;
import java.util.Random;
public class RandomSentenceSpout extends BaseRichSpout {
SpoutOutputCollector _collector;
Random _rand;
int _num;
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
_collector = collector;
_rand = new Random();
}
@Override
public void nextTuple() {
Utils.sleep(100);
String[] sentences = new String[]{ "the cow jumped over the moon", "an apple a day keeps the doctor away",
"four score and seven years ago", "snow white and the seven dwarfs", "i am at two with nature" };
int num = _rand.nextInt(sentences.length);
String sentence = sentences[num];
_num++;
System.err.println("Spout__batch_num:"+_num+"___Random number is :"+num+" Emit Sentence is :"+sentence);
_collector.emit(new Values(sentence));
}
@Override
public void ack(Object id) {
}
@Override
public void fail(Object id) {
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
3、bolt的实现类嵌套在topo类里。