Storm入门(四)WordCount示例

Storm API文档网址如下:

http://storm.apache.org/releases/current/javadocs/index.html

一、关联代码

使用maven,代码如下。

pom.xml  和Storm入门(三)HelloWorld示例相同

RandomSentenceSpout.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package cn.ljh.storm.wordcount;

import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.Random;

public class RandomSentenceSpout extends BaseRichSpout {
  private static final Logger LOG = LoggerFactory.getLogger(RandomSentenceSpout.class);

  SpoutOutputCollector _collector;
  Random _rand;

  public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
    _collector = collector;
    _rand = new Random();
  }

  public void nextTuple() {
    Utils.sleep(100);
    String[] sentences = new String[]{
            sentence("the cow jumped over the moon"),
            sentence("an apple a day keeps the doctor away"),
            sentence("four score and seven years ago"),
            sentence("snow white and the seven dwarfs"),
            sentence("i am at two with nature")};
    final String sentence = sentences[_rand.nextInt(sentences.length)];

    LOG.debug("Emitting tuple: {}", sentence);

    _collector.emit(new Values(sentence));
  }

  protected String sentence(String input) {
    return input;
  }

  @Override
  public void ack(Object id) {
  }

  @Override
  public void fail(Object id) {
  }

  public void declareOutputFields(OutputFieldsDeclarer declarer) {
    declarer.declare(new Fields("word"));
  }

  // Add unique identifier to each tuple, which is helpful for debugging
  public static class TimeStamped extends RandomSentenceSpout {
    private final String prefix;

    public TimeStamped() {
      this("");
    }

    public TimeStamped(String prefix) {
      this.prefix = prefix;
    }

    protected String sentence(String input) {
      return prefix + currentDate() + " " + input;
    }

    private String currentDate() {
      return new SimpleDateFormat("yyyy.MM.dd_HH:mm:ss.SSSSSSSSS").format(new Date());
    }
  }
}

WordCountTopology.java

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package cn.ljh.storm.wordcount;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class WordCountTopology {
  public static class SplitSentence implements IRichBolt {
    private OutputCollector _collector;
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
      declarer.declare(new Fields("word"));
    }

    public Map<String, Object> getComponentConfiguration() {
      return null;
    }

    public void prepare(Map stormConf, TopologyContext context,
            OutputCollector collector) {
        _collector = collector;
    }

    public void execute(Tuple input) {
        String sentence = input.getStringByField("word");
        String[] words = sentence.split(" ");
        for(String word : words){
            this._collector.emit(new Values(word));
        }
    }

    public void cleanup() {
        // TODO Auto-generated method stub

    }
  }

  public static class WordCount extends BaseBasicBolt {
    Map<String, Integer> counts = new HashMap<String, Integer>();

    public void execute(Tuple tuple, BasicOutputCollector collector) {
      String word = tuple.getString(0);
      Integer count = counts.get(word);
      if (count == null)
        count = 0;
      count++;
      counts.put(word, count);
      collector.emit(new Values(word, count));
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
      declarer.declare(new Fields("word", "count"));
    }
  }

  public static class WordReport extends BaseBasicBolt {
        Map<String, Integer> counts = new HashMap<String, Integer>();

        public void execute(Tuple tuple, BasicOutputCollector collector) {
          String word = tuple.getStringByField("word");
          Integer count = tuple.getIntegerByField("count");
          this.counts.put(word, count);
        }

        public void declareOutputFields(OutputFieldsDeclarer declarer) {

        }

        @Override
        public void cleanup() {
            System.out.println("-----------------FINAL COUNTS  START-----------------------");
            List<String> keys = new ArrayList<String>();
            keys.addAll(this.counts.keySet());
            Collections.sort(keys);

            for(String key : keys){
                System.out.println(key + " : " + this.counts.get(key));
            }

            System.out.println("-----------------FINAL COUNTS  END-----------------------");
        } 

      }

  public static void main(String[] args) throws Exception {

    TopologyBuilder builder = new TopologyBuilder();

    builder.setSpout("spout", new RandomSentenceSpout(), 5);

    //ShuffleGrouping:随机选择一个Task来发送。
    builder.setBolt("split", new SplitSentence(), 8).shuffleGrouping("spout");
    //FiledGrouping:根据Tuple中Fields来做一致性hash,相同hash值的Tuple被发送到相同的Task。
    builder.setBolt("count", new WordCount(), 12).fieldsGrouping("split", new Fields("word"));
    //GlobalGrouping:所有的Tuple会被发送到某个Bolt中的id最小的那个Task。
    builder.setBolt("report", new WordReport(), 6).globalGrouping("count");

    Config conf = new Config();
    conf.setDebug(true);

    if (args != null && args.length > 0) {
      conf.setNumWorkers(3);

      StormSubmitter.submitTopologyWithProgressBar(args[0], conf, builder.createTopology());
    }
    else {
      conf.setMaxTaskParallelism(3);

      LocalCluster cluster = new LocalCluster();
      cluster.submitTopology("word-count", conf, builder.createTopology());

      Thread.sleep(20000);

      cluster.shutdown();
    }
  }
}

二、执行效果

时间: 2024-10-19 16:51:29

Storm入门(四)WordCount示例的相关文章

【第四篇】ASP.NET MVC快速入门之完整示例(MVC5+EF6)

目录 [第一篇]ASP.NET MVC快速入门之数据库操作(MVC5+EF6) [第二篇]ASP.NET MVC快速入门之数据注解(MVC5+EF6) [第三篇]ASP.NET MVC快速入门之安全策略(MVC5+EF6) [第四篇]ASP.NET MVC快速入门之完整示例(MVC5+EF6) [番外篇]ASP.NET MVC快速入门之免费jQuery控件库(MVC5+EF6) 请关注三石的博客:http://cnblogs.com/sanshi 完善数据注解 到目前为止的表格页面效果: 我们需

【转载】Lucene.Net入门教程及示例

本人看到这篇非常不错的Lucene.Net入门基础教程,就转载分享一下给大家来学习,希望大家在工作实践中可以用到. 一.简单的例子 //索引Private void Index(){    IndexWriter writer = new IndexWriter(@"E:\Index", new StandardAnalyzer());    Document doc = new Document();    doc.Add(new Field("Text",&qu

【原创】NIO框架入门(四):Android与MINA2、Netty4的跨平台UDP双向通信实战

概述 本文演示的是一个Android客户端程序,通过UDP协议与两个典型的NIO框架服务端,实现跨平台双向通信的完整Demo. 当前由于NIO框架的流行,使得开发大并发.高性能的互联网服务端成为可能.这其中最流行的无非就是MINA和Netty了,MINA目前的主要版本是MINA2.而Netty的主要版本是Netty3和Netty4(Netty5已经被取消开发了:详见此文). 本文中,服务端将分别用MINA2和Netty4进行实现,但在你实际的项目中服务端实现只需选其一就行了.本文中的Demo同时

Spark教程-构建Spark集群-配置Hadoop伪分布模式并运行Wordcount示例(1)

第四步:配置Hadoop伪分布模式并运行Wordcount示例 伪分布模式主要涉及一下的配置信息: 修改Hadoop的核心配置文件core-site.xml,主要是配置HDFS的地址和端口号: 修改Hadoop中HDFS的配置文件hdfs-site.xml,主要是配置replication; 修改Hadoop的MapReduce的配置文件mapred-site.xml,主要是配置JobTracker的地址和端口: 在具体操作前我们先在Hadoop目录下创建几个文件夹: 下面开始构建具体的伪分布式

Java使用Protocol Buffers入门四步骤

Protocol Buffers(简称protobuf)是谷歌的一项技术,用于将结构化的数据序列化.反序列化,经常用于网络传输. 这货实际上类似于XML生成和解析,但protobuf的效率高于XML,不过protobuf生成的是字节码,可读性比XML差.类似的还有json.Java的Serializable等. protobuf支持各种语言.本文以Java为例,简单介绍protobuf如何使用.其他语言使用方法类似. 首先需要下载: http://download.csdn.net/downlo

Thinkphp入门 四 —布局、缓存、系统变量 (48)

原文:Thinkphp入门 四 -布局.缓存.系统变量 (48) [控制器操作方法参数设置] http://网址/index.php/控制器/操作方法 [页面跳转] [变量调节器] Smarty变量调节器 TP变量调节器:普通的php函数 (count  strlen   str_replace) 定义:前者的输出,是后者的输入 [子模板包含] 当前模块彼此包含 <include  file=”模板名称”  /> [使用布局layout] 1. 开启布局,配置变量信息config.php 2.

Redbean:入门(四) - 反射机制 以及 事务

<?php //引入rb入口文件 include_once 'rb.php'; //定义dsn以及相关的数据 $dsn = 'mysql:host=localhost;dbname=hwibs_model'; $user = 'root'; $pass = ''; $table = 'link'; //链接数据库 R::setup($dsn,$user,$pass); //链接数据表 $handler = R::dispense($table); //inspect::反射表,将表的字段结构返回

Hadoop MapReduce 官方教程 -- WordCount示例

Hadoop MapReduce 官方教程 -- WordCount示例: http://hadoop.apache.org/docs/r1.0.4/cn/mapred_tutorial.html#%E4%BE%8B%E5%AD%90%EF%BC%9AWordCount+v1.0

[WebGL入门]四,渲染准备

注:文章译自http://wgld.org/,原作者杉本雅広(doxas),文章中如果有我的额外说明,我会加上[lufy:],另外,鄙人webgl研究还不够深入,一些专业词语,如果翻译有误,欢迎大家指正. 必须准备的东西 上次介绍了3D绘图的基础知识.讲了一下由Z坐标的不同决定的两种坐标系,以及坐标变换的种类.这一次,说一说实际WebGL绘图的时候必须准备的东西. 首先,HTML,javascript相关的基础知识就不解释了.如果,有不明白的单词或概念的话,请自己查一下.我是认为你有一定的HTM