

可以参考[linux]ubuntu下安装hadoop [linux]ubutnu12.04 下安装jdk1.7




16 package org.apache.hadoop.examples;
18 import java.io.IOException;
19 import java.util.StringTokenizer;
21 import org.apache.hadoop.conf.Configuration;
22 import org.apache.hadoop.fs.Path;
23 import org.apache.hadoop.io.IntWritable;
24 import org.apache.hadoop.io.Text;
25 import org.apache.hadoop.mapreduce.Job;
26 import org.apache.hadoop.mapreduce.Mapper;
27 import org.apache.hadoop.mapreduce.Reducer;
28 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
29 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
30 import org.apache.hadoop.util.GenericOptionsParser;
32 public class WordCount {
34   public static class TokenizerMapper
35        extends Mapper<Object, Text, Text, IntWritable>{
37     private final static IntWritable one = new IntWritable(1);
38     private Text word = new Text();
40     public void map(Object key, Text value, Context context
41                     ) throws IOException, InterruptedException {
42       StringTokenizer itr = new StringTokenizer(value.toString());
43       while (itr.hasMoreTokens()) {
44         word.set(itr.nextToken());
45         context.write(word, one);
46       }
47     }
48   }
50   public static class IntSumReducer
51        extends Reducer<Text,IntWritable,Text,IntWritable> {
52     private IntWritable result = new IntWritable();
54     public void reduce(Text key, Iterable<IntWritable> values,
55                        Context context
56                        ) throws IOException, InterruptedException {
57       int sum = 0;
58       for (IntWritable val : values) {
59         sum += val.get();
60       }
61       result.set(sum);
62       context.write(key, result);
63     }
64   }
66   public static void main(String[] args) throws Exception {
67     Configuration conf = new Configuration();
68     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
69     if (otherArgs.length != 2) {
70       System.err.println("Usage: wordcount <in> <out>");
71       System.exit(2);
72     }
73     Job job = new Job(conf, "word count");
74     job.setJarByClass(WordCount.class);
75     job.setMapperClass(TokenizerMapper.class);
76     job.setCombinerClass(IntSumReducer.class);
77     job.setReducerClass(IntSumReducer.class);
78     job.setOutputKeyClass(Text.class);
79     job.setOutputValueClass(IntWritable.class);
80     FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
81     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
82     System.exit(job.waitForCompletion(true) ? 0 : 1);
83   }
84 }


mkdir hadoop1.2.1/classes



javac WordCount.java -d .


for f in $hadoop_HOME/hadoop-*.jar; do

for f in $hadoop_HOME/lib/*.jar; do


 1 Usage: javac <options> <source files>
 2 where possible options include:
 3   -g                         Generate all debugging info
 4   -g:none                    Generate no debugging info
 5   -g:{lines,vars,source}     Generate only some debugging info
 6   -nowarn                    Generate no warnings
 7   -verbose                   Output messages about what the compiler is doing
 8   -deprecation               Output source locations where deprecated APIs are used
 9   -classpath <path>          Specify where to find user class files and annotation processors
10   -cp <path>                 Specify where to find user class files and annotation processors
11   -sourcepath <path>         Specify where to find input source files
12   -bootclasspath <path>      Override location of bootstrap class files
13   -extdirs <dirs>            Override location of installed extensions
14   -endorseddirs <dirs>       Override location of endorsed standards path
15   -proc:{none,only}          Control whether annotation processing and/or compilation is done.
16   -processor <class1>[,<class2>,<class3>...] Names of the annotation processors to run; bypasses default discovery process
17   -processorpath <path>      Specify where to find annotation processors
18   -d <directory>             Specify where to place generated class files
19   -s <directory>             Specify where to place generated source files
20   -implicit:{none,class}     Specify whether or not to generate class files for implicitly referenced files
21   -encoding <encoding>       Specify character encoding used by source files
22   -source <release>          Provide source compatibility with specified release
23   -target <release>          Generate class files for specific VM version
24   -version                   Version information
25   -help                      Print a synopsis of standard options
26   -Akey[=value]              Options to pass to annotation processors
27   -X                         Print a synopsis of nonstandard options
28   -J<flag>                   Pass <flag> directly to the runtime system
29   -Werror                    Terminate compilation if warnings occur
30   @<filename>                Read options and filenames from file


1 -classpath <path>          Specify where to find user class files and annotation processors
2 -cp <path>                 Specify where to find user class files and annotation processors


javac -cp $hadoop_CLASSPATH WordCount.java -d .

编译成功,classes文件夹出现了一个org的文件夹,点击进去可以发现文件夹的层次是org/apache/hadoop/examples 然后在examples文件夹中看到三个.class

[email protected] ~/hadoop-1.2.1/classes/org/apache/hadoop/examples $ pwd
[email protected] ~/hadoop-1.2.1/classes/org/apache/hadoop/examples $ ls
WordCount.class  WordCount$IntSumReducer.class  WordCount$TokenizerMapper.class

层次结构出现的原因是源代码开始是有一个package org.apache.hadoop.examples;



jar -cvf WordCount.jar org

然后当前文件夹就会出现WordCount.jar文件,可以使用jar -tvf WordCount.jar看一下这个包的层次结构

jar -tvf WordCount.jar
0 Fri Aug 15 19:58:32 CST 2014 META-INF/
68 Fri Aug 15 19:58:32 CST 2014 META-INF/MANIFEST.MF
0 Fri Aug 15 19:53:28 CST 2014 org/
0 Fri Aug 15 19:53:28 CST 2014 org/apache/
0 Fri Aug 15 19:53:28 CST 2014 org/apache/hadoop/
0 Fri Aug 15 19:53:28 CST 2014 org/apache/hadoop/examples/
1911 Fri Aug 15 19:53:28 CST 2014 org/apache/hadoop/examples/WordCount.class
1790 Fri Aug 15 19:53:28 CST 2014 org/apache/hadoop/examples/WordCount$TokenizerMapper.class
1793 Fri Aug 15 19:53:28 CST 2014 org/apache/hadoop/examples/WordCount$IntSumReducer.class


hadoop jar WordCount.jar org.apache.hadoop.examples.WordCount input output
hadoop jar WordCount.jar WordCount input output

WordCount.jar不一定是与主类相同,可以是CountWord.jar或者其他名字,但是上述命令hadoop jar 包名 程序主类名字 输入文件夹 输出文件夹





