hadoop 处理日志

127.0.0.1 - - [03/Jul/2014:23:36:38 +0800] "GET /course/detail/3.htm HTTP/1.0" 200 38435 0.038

182.131.89.195 - - [03/Jul/2014:23:37:43 +0800] "GET / HTTP/1.0" 301 - 0.000

127.0.0.1 - - [03/Jul/2014:23:38:27 +0800] "POST /service/notes/addViewTimes_23.htm HTTP/1.0" 200 2 0.003

127.0.0.1 - - [03/Jul/2014:23:39:03 +0800] "GET /html/notes/20140617/779.html HTTP/1.0" 200 69539 0.046

127.0.0.1 - - [03/Jul/2014:23:43:00 +0800] "GET /html/notes/20140318/24.html HTTP/1.0" 200 67171 0.049

127.0.0.1 - - [03/Jul/2014:23:43:59 +0800] "POST /service/notes/addViewTimes_779.htm HTTP/1.0" 200 1 0.003

127.0.0.1 - - [03/Jul/2014:23:45:51 +0800] "GET / HTTP/1.0" 200 70044 0.060

127.0.0.1 - - [03/Jul/2014:23:46:17 +0800] "GET /course/list/73.htm HTTP/1.0" 200 12125 0.010

127.0.0.1 - - [03/Jul/2014:23:46:58 +0800] "GET /html/notes/20140609/542.html HTTP/1.0" 200 94971 0.077

127.0.0.1 - - [03/Jul/2014:23:48:31 +0800] "POST /service/notes/addViewTimes_24.htm HTTP/1.0" 200 2 0.003

127.0.0.1 - - [03/Jul/2014:23:48:34 +0800] "POST /service/notes/addViewTimes_542.htm HTTP/1.0" 200 2 0.003

127.0.0.1 - - [03/Jul/2014:23:49:31 +0800] "GET /notes/index-top-3.htm HTTP/1.0" 200 53494 0.041

127.0.0.1 - - [03/Jul/2014:23:50:55 +0800] "GET /html/notes/20140609/544.html HTTP/1.0" 200 183694 0.076

127.0.0.1 - - [03/Jul/2014:23:53:32 +0800] "POST /service/notes/addViewTimes_544.htm HTTP/1.0" 200 2 0.004

127.0.0.1 - - [03/Jul/2014:23:54:53 +0800] "GET /html/notes/20140620/900.html HTTP/1.0" 200 151770 0.054

127.0.0.1 - - [03/Jul/2014:23:57:42 +0800] "GET /html/notes/20140620/872.html HTTP/1.0" 200 52373 0.034

127.0.0.1 - - [03/Jul/2014:23:58:17 +0800] "POST /service/notes/addViewTimes_900.htm HTTP/1.0" 200 2 0.003

127.0.0.1 - - [03/Jul/2014:23:58:51 +0800] "GET / HTTP/1.0" 200 70044 0.057

public class LogMapper extends Mapper<LongWritable, Text, Text, IntWritable>{

@Override

protected void map(LongWritable key, Text value,Context context)

throws IOException, InterruptedException {

String line = value.toString().trim();

String temp = log(line);

if(temp.length()>0){

context.write(new Text(temp), new IntWritable(1));

}

//处理字符串方法

static String log(String line){

String result = "";

try{

if(line.length()>20){

if(line.indexOf("GET")>0){

result = line.substring(line.indexOf("GET"), line.indexOf("HTTP/1.0")).trim();

}else if(line.indexOf("POST")>0){

result = line.substring(line.indexOf("POST"), line.indexOf("HTTP/1.0")).trim();

}

}catch(Exception e){

System.out.println(line);

}

return result;

}

//测试log方法

public static void main(String[] args) {

String line = "127.0.0.1 - - [03/Jul/2014:23:36:38 +0800] \"GET /course/detail/3.htm HTTP/1.0\" 200 38435 0.038";

System.out.println(log(line));

}

public class LogReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

@Override

protected void reduce(Text key, Iterable<IntWritable> values,Context context)

throws IOException, InterruptedException {

int sum = 0;

for (IntWritable val : values) {

sum += val.get();

}

context.write(key, new IntWritable(sum));

}

public class JobMain {

/**

* @param args

public static void main(String[] args)throws Exception {

Configuration configuration = new Configuration();

Job job = new Job(configuration,"log_job");

job.setJarByClass(JobMain.class);

job.setMapperClass(LogMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setReducerClass(LogReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(args[0]));

Path path = new Path(args[1]);

FileSystem fs = FileSystem.get(configuration);

if(fs.exists(path)){

fs.delete(path, true);

}

FileOutputFormat.setOutputPath(job, path);

System.exit(job.waitForCompletion(true)?0:1);

}

时间： 2024-11-07 16:16:27

hadoop 处理日志

hadoop 处理日志的相关文章

Hadoop错误日志

使用Hadoop统计日志数据

Hadoop 之日志管理—应用在 YARN 中运行时的日志

修改hadoop/hdfs日志级别

Hadoop学习日志- install hadoop

hadoop中日志聚集问题

Hadoop日志存放位置

Hadoop日志文件

海量Web日志分析用Hadoop提取KPI统计指标