示例代码:
package com.lky.topology; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.LocalDRPC; import backtype.storm.coordination.BatchOutputCollector; import backtype.storm.drpc.LinearDRPCTopologyBuilder; import backtype.storm.task.TopologyContext; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseBasicBolt; import backtype.storm.topology.base.BaseBatchBolt; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import backtype.storm.utils.Utils; import java.util.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * @Title: ReachTopology.java * @Package com.lky.topology * @Description: 计算一个包含特定url的微博,最终能被多少人看到 * @author lky * @date 2015年10月23日 下午9:09:22 * @version V1.0 */ public class ReachTopology { private static Log log = LogFactory.getLog(ReachTopology.class); public static Map<String, List<String>> TWEETERS_DB = new HashMap<String, List<String>>() { { put("foo.com/blog/1", Arrays.asList("sally", "bob", "tim", "george", "nathan")); put("engineering.twitter.com/blog/5", Arrays.asList("adam", "david", "sally", "nathan")); put("tech.backtype.com/blog/123", Arrays.asList("tim", "mike", "john")); } }; public static Map<String, List<String>> FOLLOWERS_DB = new HashMap<String, List<String>>() { { put("sally", Arrays.asList("bob", "tim", "alice", "adam", "jim", "chris", "jai")); put("bob", Arrays.asList("sally", "nathan", "jim", "mary", "david", "vivian")); put("tim", Arrays.asList("alex")); put("nathan", Arrays.asList("sally", "bob", "adam", "harry", "chris", "vivian", "emily", "jordan")); put("adam", Arrays.asList("david", "carissa")); put("mike", Arrays.asList("john", "bob")); put("john", Arrays.asList("alice", "nathan", "jim", "mike", "bob")); } }; /** * @Title: ReachTopology.java * @Package com.lky.topology * @Description: 获取包含该特定url的所有用户,随机发放到下游bolt中 * @author lky * @date 2015年10月23日 下午11:46:19 * @version V1.0 */ @SuppressWarnings("serial") public static class GetTweeters extends BaseBasicBolt { @Override public void execute(Tuple input, BasicOutputCollector collector) { Object id = null; String url = null; try { id = input.getValue(0); url = input.getString(1); List<String> tweeters = new ArrayList<String>();//获取包含该url的所有用户 if (null != id && null != url) { tweeters = TWEETERS_DB.get(url); if (null != tweeters) { for (String tweeter : tweeters) { log.info("execute1------>[id = " + id + " ]["+url+"---->tweeter=" + tweeter + "]"); collector.emit(new Values(id, tweeter)); } } } } catch (Exception e) { log.error("execute 发射消息错误!!!!!"); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("id", "tweeter")); } } /** * @Title: ReachTopology.java * @Package com.lky.topology * @Description:获取每一个用户的粉丝,然后按字段分组(id,fllower)到下游bolt中,保证同一类url的相同用户被分到相同的批次 * @author lky * @date 2015年10月23日 下午11:47:45 * @version V1.0 */ @SuppressWarnings("serial") public static class GetFollowers extends BaseBasicBolt { @Override public void execute(Tuple input, BasicOutputCollector collector) { Object id = null; String _follower = null; try { id = input.getValue(0); _follower = input.getString(1); List<String> followers = new ArrayList<String>(); if (null != id && null != _follower) { followers = FOLLOWERS_DB.get(_follower);//获取该用户的所有粉丝 if (null != followers) { for (String follower : followers) { log.info("execute2------>[id = " + id + " ]["+_follower+"------>follower=" + follower + "]"); collector.emit(new Values(id, follower)); } } } } catch (Exception e) { log.error("execute 发射消息异常!!!"); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("id", "follower")); } } /** * @Title: ReachTopology.java * @Package com.lky.topology * @Description: 按批次统计粉丝数量 * @author lky * @date 2015年10月23日 下午11:50:51 * @version V1.0 */ @SuppressWarnings({ "serial", "rawtypes" }) public static class PartialUniquer extends BaseBatchBolt { private BatchOutputCollector collector; private Object id; private Set<String> _followerSet = new HashSet<String>(); @Override public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, Object id) { this.collector = collector; this.id = id; } @Override public void execute(Tuple tuple) { String uname = null; try { uname = tuple.getString(1); if (null != uname) { log.info("execute3------>[id = " + tuple.getValue(0) + " ][ uname=" + uname + "]"); _followerSet.add(uname); } } catch (Exception e) { log.error("execute 接收消息异常!!!"); } } @Override public void finishBatch() { log.info("execute4------>[id = " + id + " ][ size=" + _followerSet.size() + "]"); collector.emit(new Values(id, _followerSet.size())); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("id", "count")); } } /** * @Title: ReachTopology.java * @Package com.lky.topology * @Description: 按相同的id汇总批次 * @author lky * @date 2015年10月23日 下午11:51:49 * @version V1.0 */ @SuppressWarnings({ "serial", "rawtypes" }) public static class CountAggregator extends BaseBatchBolt { private BatchOutputCollector collector; private Object id; private int _count = 0; @Override public void prepare(Map conf, TopologyContext context, BatchOutputCollector collector, Object id) { this.collector = collector; this.id = id; } @Override public void execute(Tuple tuple) { Integer count = null; try { count = tuple.getInteger(1); log.info("execute5------>[id = " + tuple.getValue(0) + " ][ count=" + count + "]"); _count += count; } catch (Exception e) { log.error("execute 接收消息异常"); } } @Override public void finishBatch() { collector.emit(new Values(id, _count)); } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("id", "result")); } } @SuppressWarnings("deprecation") public static LinearDRPCTopologyBuilder construct() { LinearDRPCTopologyBuilder builder = new LinearDRPCTopologyBuilder("reach"); builder.addBolt(new GetTweeters(), 4); builder.addBolt(new GetFollowers(), 12).shuffleGrouping(); builder.addBolt(new PartialUniquer(), 6).fieldsGrouping(new Fields("id", "follower")); builder.addBolt(new CountAggregator(), 3).fieldsGrouping(new Fields("id")); return builder; } @SuppressWarnings("deprecation") public static void main(String[] args) { LinearDRPCTopologyBuilder builder = construct(); Config conf = new Config(); conf.setMaxTaskParallelism(3); LocalDRPC drpc = new LocalDRPC(); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("reach-drpc", conf, builder.createLocalTopology(drpc)); String[] urlsToTry = new String[] { "foo.com/blog/1", "engineering.twitter.com/blog/5", "notaurl.com" }; for (String url : urlsToTry) { System.out.println("Reach of " + url + ": " + drpc.execute("reach", url)); } Utils.sleep(1000 * 10); cluster.shutdown(); drpc.shutdown(); } }
时间: 2024-10-21 14:37:34