在Eclipse上安装Hadoop2.2插件并测试开发

一、前提工作

Hadoop搭建完成

二、开发环境

Win7 64位

Eclipse3.4.2

Hadoop2.2

三、开工

1，本地解压hadoop-2.2.0.tar.gz，配置HADOOP_HOME环境变量，并配置%HADOOP_HOME%\bin到Path中。

2，下载hadoop-common-2.2.0-bin-master.zip，路径：

https://github.com/srccodes/hadoop-common-2.2.0-bin

将文件解压放到%HADOOP_HOME%\bin目录下。

3，打开Eclipse，细节配置如下

四、开发测试工程

1，测试代码，可以选择WordCount。

package test;

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;  

public class WordCountTest {

    /**
    * MapReduceBase类:实现了Mapper和Reducer接口的基类（其中的方法只是实现接口，而未作任何事情）
    * Mapper接口：
    * WritableComparable接口：实现WritableComparable的类可以相互比较。所有被用作key的类应该实现此接口。
    * Reporter 则可用于报告整个应用的运行进度，本例中未使用。
    *
    */
 public static class TokenizerMapper
      extends Mapper<Object, Text, Text, IntWritable>{  

     /**
      * LongWritable, IntWritable, Text 均是 Hadoop 中实现的用于封装 Java 数据类型的类，这些类实现了WritableComparable接口，
      * 都能够被串行化从而便于在分布式环境中进行数据交换，你可以将它们分别视为long,int,String 的替代品。
      */
   private final static IntWritable one = new IntWritable(1);
   private Text word = new Text();//Text 实现了BinaryComparable类可以作为key值  

   /**
    * Mapper接口中的map方法：
    * void map(K1 key, V1 value, OutputCollector<K2,V2> output, Reporter reporter)
    * 映射一个单个的输入k/v对到一个中间的k/v对
    * 输出对不需要和输入对是相同的类型，输入对可以映射到0个或多个输出对。
    * OutputCollector接口：收集Mapper和Reducer输出的<k,v>对。
    * OutputCollector接口的collect(k, v)方法:增加一个(k,v)对到output
    */    

   public void map(Object key, Text value, Context context) throws IOException, InterruptedException {  

       /**
        * 原始数据：
        * c++ java hello
           world java hello
           you me too
           map阶段，数据如下形式作为map的输入值：key为偏移量
           0  c++ java hello
           16 world java hello
           34 you me too 

        */  

        /**
         * 以下解析键值对
        * 解析后以键值对格式形成输出数据
        * 格式如下：前者是键排好序的，后者数字是值
        * c++ 1
        * java 1
        * hello 1
        * world 1
        * java 1
        * hello 1
        * you 1
        * me 1
        * too 1
        * 这些数据作为reduce的输出数据
        */
     StringTokenizer itr = new StringTokenizer(value.toString());//得到什么值
     System.out.println("value什么东西 ： "+value.toString());
     System.out.println("key什么东西 ： "+key.toString());  

     while (itr.hasMoreTokens()) {
       word.set(itr.nextToken());  

       context.write(word, one);
     }
   	}
   }
 public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
	    private IntWritable result = new IntWritable();
	    /**
	     * reduce过程是对输入数据解析形成如下格式数据：
	     * (c++ [1])
	     * (java [1,1])
	     * (hello [1,1])
	     * (world [1])
	     * (you [1])
	     * (me [1])
	     * (you [1])
	     * 供接下来的实现的reduce程序分析数据数据
	     *
	     */
	    public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
	      int sum = 0;
	      /**
	       * 自己的实现的reduce方法分析输入数据
	       * 形成数据格式如下并存储
	       *     c++    1
	       *    hello   2
	       *    java    2
	       *    me      1
	       *    too     1
	       *    world   1
	       *    you     1
	       *
	       */
	      for (IntWritable val : values) {
	        sum += val.get();
	      }  

	      result.set(sum);
	      context.write(key, result);
	    }
	  }  

 public static void main(String[] args) throws Exception {  

     /**
      * JobConf：map/reduce的job配置类，向hadoop框架描述map-reduce执行的工作
      * 构造方法：JobConf()、JobConf(Class exampleClass)、JobConf(Configuration conf)等
      */
   //根据自己的实际情况填写输入分析的目录和结果输出的目录
   args = new String[2];
   args[0] = "hdfs://192.168.13.33:9000/in";
   args[1] = "hdfs://192.168.13.33:9000/out5";

   Configuration conf = new Configuration();  

//	conf.set("fs.defaultFS", "hdfs://Master.Hadoop:9000");
//	conf.set("hadoop.job.user","root");
//	conf.set("mapreduce.framework.name","yarn");
//	//conf.set("mapred.job.tracker","192.168.1.187:9001"); 用下面的设置而不用该设置，该设置是旧版本的设置，自己用的是hadoop2.3.0，查看官方配置文档后发现里面用的是下面mapreduce.jobtracker.address的配置地址
//	conf.set("mapreduce.jobtracker.address","192.168.13.33:9001");
//	conf.set("yarn.resourcemanager.hostname", "master.hadoop");
//	conf.set("yarn.resourcemanager.admin.address", "192.168.13.33:8033");
//	conf.set("yarn.resourcemanager.address", "192.168.13.33:8032");
//	conf.set("yarn.resourcemanager.resource-tracker.address", "192.168.13.33:8031");
//	conf.set("yarn.resourcemanager.scheduler.address", "192.168.13.33:8030");

   String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
   for(String s : otherArgs){
	   System.out.println(s);
   }

   //这里需要配置参数即输入和输出的HDFS的文件路径
   if (otherArgs.length != 2) {
     System.err.println("Usage: wordcount <in> <out>");
     System.exit(2);
   }
  // JobConf conf1 = new JobConf(WordCount.class);
   Job job = new Job(conf, "word count");//Job(Configuration conf, String jobName) 设置job名称和
   job.setJarByClass(WordCountTest.class);
   job.setMapperClass(TokenizerMapper.class); //为job设置Mapper类
   job.setCombinerClass(IntSumReducer.class); //为job设置Combiner类
   job.setReducerClass(IntSumReducer.class); //为job设置Reduce类
   job.setOutputKeyClass(Text.class);        //设置输出key的类型
   job.setOutputValueClass(IntWritable.class);//  设置输出value的类型
   FileInputFormat.addInputPath(job, new Path(otherArgs[0])); //为map-reduce任务设置InputFormat实现类   设置输入路径  

   FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));//为map-reduce任务设置OutputFormat实现类  设置输出路径
   System.exit(job.waitForCompletion(true) ? 0 : 1);
 }  

}

2，右键，选择Run On Hadoop。

3，下面控制台会输入日志，通过HDFS查看目录和out文件。

参考：

hadoop2.2 学习3 在eclipse 上安装hadoop插件

http://blog.163.com/gibby_l/blog/static/8300316120140180555754/

eclipse中开发Hadoop2.x的Map/Reduce项目

http://www.micmiu.com/bigdata/hadoop/hadoop2x-eclipse-mapreduce-demo/

配置Hadoop开发环境（Eclipse）

http://blog.csdn.net/zythy/article/details/17397153

Hadoop学习三十：Win7 Eclipse调试Centos Hadoop2.2-Mapreduce

http://www.tuicool.com/articles/AjUZrq

hadoop-common-2.2.0-bin

https://github.com/srccodes/hadoop-common-2.2.0-bin/tree/master/bin

时间： 2024-10-25 04:33:26

在Eclipse上安装Hadoop2.2插件并测试开发的相关文章

Eclipse上安装GIT插件EGit及使用

Eclipse上安装GIT插件EGit及使用博客分类: GIT 一.Eclipse上安装GIT插件EGit Eclipse的版本eclipse-java-helios-SR2-win32.zip(在Eclipse3.3版本找不到对应的 EGit插件,无法安装) EGit插件地址:http://download.eclipse.org/egit/updates OK,随后连续下一步默认安装就可以,安装后进行重启Eclipse 二.在Eclipse中配置EGit 准备工作:需要在https://g

【转】Eclipse上安装GIT插件EGit及使用

http://yufenfei.iteye.com/blog/1750124 一.Eclipse上安装GIT插件EGit Eclipse的版本eclipse-java-helios-SR2-win32.zip(在Eclipse3.3版本找不到对应的 EGit插件,无法安装) EGit插件地址:http://download.eclipse.org/egit/updates OK,随后连续下一步默认安装就可以,安装后进行重启Eclipse 二.在Eclipse中配置EGit 准备工作:需要在htt

Eclipse上安装GIT插件EGit

一.Eclipse上安装GIT插件EGit Eclipse的版本eclipse-java-helios-SR2-win32.zip(在Eclipse3.3版本找不到对应的 EGit插件,无法安装) EGit插件地址:http://download.eclipse.org/egit/updates OK,随后连续下一步默认安装就可以,安装后进行重启Eclipse 二.在Eclipse中配置EGit 准备工作:需要在https://github.com 上注册账号 Preferences > Tea

eclipse上安装abator插件

下面是我看了网上的有一点需要强调:网址 http://ibatis.apache.org/tools/abator然后全选,然后是==>重启就好了 eclipse上安装abator插件参考:http://www.blogjava.net/beansoft/archive/2008/03/31/189795.html在eclipse上安装abator插件eclipse菜单栏 --> help --> Software Updates --> Find And Install... 在

Activiti基础教程--01（简介、代码生成Activiti的25张表、Activiti配置文件activiti.cfg.xml生成25张表、在Eclipse上安装Activiti插件）

一.简介 Activiti项目是一项新的基于Apache许可的开源BPM平台,从基础开始构建,旨在提供支持新的BPMN 2.0标准,包括支持对象管理组(OMG),面对新技术的机遇,诸如互操作性和云架构,提供技术实现. 创始人Tom Baeyens是JBoss jBPM的项目架构师,以及另一位架构师Joram Barrez,一起加入到创建Alfresco这项首次实现Apache开源许可的BPMN 2.0引擎开发中来. Activiti是一个独立运作和经营的开源项目品牌,并将独立于Alfresco开

Eclipse上安装GIT插件EGit及使用（转）

[Eclipse][SVN] 在eclipse上安装SVN

以前装过好多次SVN,始终没有一次把安装过程记录下来,这次新装机器,安装SVN插件时一波三折,记录下来免得以后又忘记了. 方法一: 1. 直接通过后台添加URL通过互联网进行安装,直接上图: 2. 填写相应的版本的插件url(1.6: http://subclipse.tigris.org/update_1.6.x, 1.8: http://subclipse.tigris.org/update_1.8.x),这里要十分注意,这个URL里的版本信息一定要跟本地安装的windows svn版本

在Eclipse上安装Spring Tool Suite

Spring Tool Suite介绍 spring Tool Suite是一个基于Eclipse IDE开发环境中的用于开发Spring应用程序的工具,提供了开箱即用的环境用于实现.调试和部署你的Spring应用,包括为关键的服务器和云计算.Git.Maven.AspectJ和最新的Eclipse版本提供整合支持. Eclipse集成Spring Tool Suite插件有两种方式,一个是在线安装,一个是本地安装,两种方法大同小异.这里就以本地安装为例进行说明. 在Eclipse上安装Spri

eclipse下安装Extjs的插件spket

最近项目要用ext进行开发,所以这段时间开始学习ext. 我这里用的是ext3.0,eclipse3.5. 每次都要去查API,很烦,所以装个EXT提示的插件对初学者来说有很大的帮助. 假设你已经下载了ext3.0和spket的源码. 注意,官方的ext3.0里面没有提供ext.jsb文件.得自己另外去网上下载一个,这里我把下载到的ext.jsb文件放在ext-3.0.0的根目录下. 一. 以Eclipse插件形式安装启动ECLIPSE Help → Software Updates → Fi