今天我先搭建了环境
自己修改了网上的代码分为两个类 LogBean对数据进行封装打包
package org.apache.hadoop.examples; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class LogBean implements WritableComparable<LogBean> { private String id; private String time; private String traffic; private String book; private String voide; public LogBean() { super(); } public LogBean(String id, String traffic, String time,String book,String voide) { this.time = time; this.id = id; this.traffic = traffic; this.book=book; this.voide=voide; } @Override public String toString() { return "LogBean{" + "id=‘" + id + ‘ ‘ + ", time=‘" + time+ ‘ ‘ + ", traffic=‘" + traffic + ‘ ‘ + ", book=‘" + book + ‘ ‘ + ", voide=‘" + voide + ‘ ‘ + ‘}‘; } public String getTime() { return time; } public void setTime(String time) { this.time = time; } public String getId() { return id; } public void setIp(String id) { this.id = id; } public String getBook() { return book; } public void setBook(String book) { this.book = book; } public String getVoide() { return voide; } public void setUrl(String voide) { this.voide = voide; } @Override public int compareTo(LogBean o) { return 0; } @Override public void write(DataOutput out) throws IOException { out.writeUTF(id); out.writeUTF(time); out.writeUTF(traffic); out.writeUTF(book); out.writeUTF(voide); } @Override public void readFields(DataInput in) throws IOException { id = in.readUTF(); time = in.readUTF(); traffic =in.readUTF(); book =in.readUTF(); voide =in.readUTF(); } }
BaiDuLog.java
负责对数据进行处理然后筛选
package org.apache.hadoop.examples;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class BaiduLog {
public static class BaiduLogMapper extends Mapper<LongWritable,Text, Text, LogBean> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// super.map(key, value, context);
String log = value.toString();
String str = "(cn.baidu.core.inteceptor.LogInteceptor:55)";
if (log.indexOf(str)!=-1){
String[] log_arr = log.split(str);
String time = log_arr[0].substring(1, 10);
String[] log_arr2 = log_arr[1].split("\t");
String id = log_arr2[1];
String traffic=log_arr2[2];
String book = log_arr2[3];
String voide =log_arr2[4];
if (id.equals("null")){
id = log_arr2[1];
}
LogBean logbean = new LogBean(id,time,traffic,book,voide);
context.write(new Text(ip),logbean);
}
}
}
public static class BaiduLogReducer extends Reducer<Text,LogBean,IntWritable,Text>{
@Override
protected void reduce(Text key, Iterable<LogBean> values, Context context) throws IOException, InterruptedException {
// super.reduce(key, values, context);
int sum = 0;
StringBuffer str = new StringBuffer();
int flag = 0;
for (LogBean logbean:values){
sum++;
if (flag==0){
str.append(logbean.toString());
flag = 1;
}
}
context.write(new IntWritable(sum),new Text(str.toString()));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "avg");
job.setJarByClass(BaiduLog.class);
job.setMapperClass(BaiduLog.BaiduLogMapper.class);
job.setReducerClass(BaiduLog.BaiduLogReducer.class);
// job.setCombinerClass(BaiduLog.BaiduLogReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LogBean.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
System.exit(job.waitForCompletion(true)?0:1);
}
}
但是最后hive数据库没装好,所以导致后续的发展受到了限制
原文地址:https://www.cnblogs.com/yang-qiu/p/11853953.html