对应与map,reduce函数操作列举个小例子,操作平台eclipse与hadoop。输入文件要求:可以是一行对应多个字段,且每个字段需要使用空格分割。
Test_1:
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class Test_1 extends Configured implements Tool{ /** * @Author XD 2014-8-15 */ enum Counter{ LINESKIP, //对应于出错行的计数 } public static class Map extends Mapper<LongWritable,Text,NullWritable,Text>{ public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{ //读取源文件,line得到的是输入一行的数据 String line = value.toString(); try{ //对文件进行分割 String[] lineSplit = line.split(" "); String name = lineSplit[0]; String id = lineSplit[2]; String school = lineSplit[3]; Text out = new Text(name+' '+id+' '+school+' '); context.write( NullWritable.get(),out);//要与上面的Mapper接口中的输入输出类型匹配 }catch(java.lang.ArrayIndexOutOfBoundsException e){ context.getCounter(Counter.LINESKIP).increment(1); return; } } } public int run(String[] args) throws Exception{ //配置,初始化作业 Configuration conf = getConf(); Job job = new Job(conf,"Test_1"); job.setJarByClass(Test_1.class); //设置输入输出路径 FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); //设置处理map的处理类 job.setMapperClass(Map.class); job.setOutputFormatClass(TextOutputFormat.class); //设置map输出类型 job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.waitForCompletion(true); System.out.println("任务名称: "+job.getJobName()); System.out.println("任务成功: "+(job.isSuccessful()?"Yes":"No")); System.out.println("跳过行数:"+job.getCounters().findCounter(Counter.LINESKIP).getValue()); return job.isSuccessful()? 0:1; } public static void main(String[] args) { // TODO Auto-generated method stub try { int result = ToolRunner.run(new Configuration(),new Test_1(),args); System.out.println(result); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
此例并不具备reduce函数,只是一个简单的map处理。
Test_2:
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class Test_2 extends Configured implements Tool{ /** * @Author XD 2014-8-15 */ enum Counter{ LINESKIP, } public static class Map extends Mapper<LongWritable,Text,Text,Text>{ public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{ //读取源文件,line得到的是输入一行的数据 String line = value.toString(); try{ String[] lineSplit = line.split(" "); String anum = lineSplit[0]; String bnum = lineSplit[1]; context.write(new Text(bnum),new Text(anum)); }catch(java.lang.ArrayIndexOutOfBoundsException e){ context.getCounter(Counter.LINESKIP).increment(1); return; } } } public static class Reduce extends Reducer<Text,Text,Text,Text>{ public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException{ String valueString; String out = ""; //对于map产生的数据进行reduce 处理 for(Text value : values){ valueString = value.toString(); out += valueString+"|"; } //reduce 输出 context.write(key,new Text(out)); } } public int run(String[] args) throws Exception{ //作业初始化 Configuration conf = getConf(); Job job = new Job(conf,"Test_2"); job.setJarByClass(Test_2.class); //输入输出路径 FileInputFormat.addInputPath(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); //处理map,reduce的类 job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setOutputFormatClass(TextOutputFormat.class); //reduce的输出类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.waitForCompletion(true); System.out.println("任务名称: "+job.getJobName()); System.out.println("任务成功: "+(job.isSuccessful()?"Yes":"No")); System.out.println("跳过行数:"+job.getCounters().findCounter(Counter.LINESKIP).getValue()); return job.isSuccessful()? 0:1; } public static void main(String[] args) { // TODO Auto-generated method stub try { int result = ToolRunner.run(new Configuration(),new Test_2(),args); System.out.println(result); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
此例对应与map,reduce操作,只是个简单的小例子,只是为了实验运行环境,并不代表什么。书写程序的工作,可以在eclipse下面进行,这样易于修改和调试。但是运行建议在终端输入命令运行...
Mapreduce 小例子
时间: 2024-11-05 23:29:21