Hadoop Mapreduce 案例统计手机流量使用情况 / 憋错料

需要被统计流量的文件内容如下：

1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 200
1363157995052 13826544101 5C-0E-8B-C7-F1-E0:CMCC 120.197.40.4 4 0 264 0 200
1363157991076 13926435656 20-10-7A-28-CC-0A:CMCC 120.196.100.99 2 4 132 1512 200
1363154400022 13926251106 5C-0E-8B-8B-B1-50:CMCC 120.197.40.4 4 0 240 0 200
1363157993044 18211575961 94-71-AC-CD-E6-18:CMCC-EASY 120.196.100.99 iface.qiyi.com 视频网站 15 12 1527 2106 200
1363157995074 84138413 5C-0E-8B-8C-E8-20:7DaysInn 120.197.40.4 122.72.52.12 20 16 4116 1432 200
1363157993055 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 18 15 1116 954 200
1363157995033 15920133257 5C-0E-8B-C7-BA-20:CMCC 120.197.40.4 sug.so.360.cn 信息安全 20 20 3156 2936 200
1363157983019 13719199419 68-A1-B7-03-07-B1:CMCC-EASY 120.196.100.82 4 0 240 0 200
1363157984041 13660577991 5C-0E-8B-92-5C-20:CMCC-EASY 120.197.40.4 s19.cnzz.com 站点统计 24 9 6960 690 200
1363157973098 15013685858 5C-0E-8B-C7-F7-90:CMCC 120.197.40.4 rank.ie.sogou.com 搜索引擎 28 27 3659 3538 200
1363157986029 15989002119 E8-99-C4-4E-93-E0:CMCC-EASY 120.196.100.99 www.umeng.com 站点统计 3 3 1938 180 200
1363157992093 13560439658 C4-17-FE-BA-DE-D9:CMCC 120.196.100.99 15 9 918 4938 200
1363157986041 13480253104 5C-0E-8B-C7-FC-80:CMCC-EASY 120.197.40.4 3 3 180 180 200
1363157984040 13602846565 5C-0E-8B-8B-B6-00:CMCC 120.197.40.4 2052.flash2-http.qq.com 综合门户 15 12 1938 2910 200
1363157995093 13922314466 00-FD-07-A2-EC-BA:CMCC 120.196.100.82 img.qfc.cn 12 12 3008 3720 200
1363157982040 13502468823 5C-0A-5B-6A-0B-D4:CMCC-EASY 120.196.100.99 y0.ifengimg.com 综合门户 57 102 7335 110349 200
1363157986072 18320173382 84-25-DB-4F-10-1A:CMCC-EASY 120.196.100.99 input.shouji.sogou.com 搜索引擎 21 18 9531 2412 200
1363157990043 13925057413 00-1F-64-E1-E6-9A:CMCC 120.196.100.55 t3.baidu.com 搜索引擎 69 63 11058 48243 200
1363157988072 13760778710 00-FD-07-A4-7B-08:CMCC 120.196.100.82 2 2 120 120 200
1363157985066 13726238888 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 200

其中各个字段的解释如下，要统计手机号的上行流量，下行流量和总流量，其中总流量=上行流量+下行流量

代码如下：

FlowBean:

package com.gec.demo.bean;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/*
* 序列化的类
* */
public class FlowBean implements Writable
{
    //上行流量
    private long upFlow;
    //下行流量
    private long downFlow;
    //总流量
    private long sumFlow;

    public FlowBean() {

    }

    public FlowBean(long upFlow, long downFlow, long sumFlow) {
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = sumFlow;
    }

    public void setFlowData(long upFlow, long downFlow)
    {
        this.upFlow=upFlow;
        this.downFlow=downFlow;
        sumFlow=this.upFlow+this.downFlow;
    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }

    //序列化处理
    @Override
    public void write(DataOutput out) throws IOException {

        out.writeLong(this.getUpFlow());
        out.writeLong(this.getDownFlow());
        out.writeLong(this.getSumFlow());
    }

    //反列化处理
    @Override
    public void readFields(DataInput in) throws IOException {

        setUpFlow(in.readLong());
        setDownFlow(in.readLong());
        setSumFlow(in.readLong());
    }

    @Override
    public String toString() {
        return getUpFlow()+"\t"+getDownFlow()+"\t"+getSumFlow();
    }
}

Mapper:

package com.gec.demo;

import com.gec.demo.bean.FlowBean;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;

import java.io.IOException;

/*<
    KEYIN
    VALUEIN
    KEYOUT
    VALUEOUT
    */
public class PhoneFlowMapper extends Mapper<LongWritable, Text,Text, FlowBean>
{
    private FlowBean flowBean=new FlowBean();
    private Text keyText=new Text();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        //获取行内容
        String line=value.toString();

        String []fields=StringUtils.split(line,‘\t‘);

        //获取手机号
        String phoneNum=fields[1];

        //获取上传流量数据
        long upflow=Long.parseLong(fields[fields.length-3]);
        //获取下载流量数据
        long downflow=Long.parseLong(fields[fields.length-2]);

        flowBean.setFlowData(upflow,downflow);
        keyText.set(phoneNum);

        context.write(keyText,flowBean);

    }
}

Reducer:

package com.gec.demo;

import com.gec.demo.bean.FlowBean;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class PhoneFlowReducer extends Reducer<Text, FlowBean,Text, FlowBean>
{
    private FlowBean flowBean=new FlowBean();

    /**
     *key:phonenum（电话号码）
     *values：
     */
    @Override
    protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {

        long sumDownFlow=0;
        long sumUpFlow=0;

        //统计每台手机所耗的总流量
        for (FlowBean value : values) {

            sumUpFlow+=value.getUpFlow();
            sumDownFlow+=value.getDownFlow();
        }

        flowBean.setFlowData(sumUpFlow,sumDownFlow);

        context.write(key,flowBean);

    }
}

Driver:

package com.gec.demo;

import com.gec.demo.bean.FlowBean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;

public class PhoneFlowApp {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Configuration configuration=new Configuration();

        Job job=Job.getInstance(configuration);

        job.setJarByClass(PhoneFlowApp.class);
        //job.setJar("");

        job.setMapperClass(PhoneFlowMapper.class);
        job.setReducerClass(PhoneFlowReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        //指定要处理的数据所在的位置
        FileInputFormat.setInputPaths(job, "hdfs://hadoop-001:9000/flowcount/input/HTTP_20130313143750.dat");
        //指定处理完成之后的结果所保存的位置
        FileOutputFormat.setOutputPath(job, new Path("hdfs://hadoop-001:9000/flowcount/output/"));
        //向yarn集群提交这个job
        boolean res = job.waitForCompletion(true);
        System.exit(res?0:1);

    }
}

生成文件的结果如下：

手机号上行流量下行流量总流量

如果要按总流量的多少排序，并按手机号输出到六个不同的文件，有如下代码：

FlowBean：要实现WritableComparable接口

package com.gec.demo.Bean;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class FlowBean implements WritableComparable<FlowBean> {
    //上行流量
    private  long upFlow;
    //下行流量
    private long downFlow;
    //总流量
    private long sumFlow;

    public FlowBean() {

    }

    public long getUpFlow() {
        return upFlow;
    }

    public void setUpFlow(long upFlow) {
        this.upFlow = upFlow;
    }

    public long getDownFlow() {
        return downFlow;
    }

    public void setDownFlow(long downFlow) {
        this.downFlow = downFlow;
    }

    public long getSumFlow() {
        return sumFlow;
    }

    public void setSumFlow(long sumFlow) {
        this.sumFlow = sumFlow;
    }

    public FlowBean(long upFlow, long downFlow, long sumFlow) {
        this.upFlow = upFlow;
        this.downFlow = downFlow;
        this.sumFlow = sumFlow;
    }
    public void setFlowData(long upFlow,long downFlow){
        this.upFlow=upFlow;
        this.downFlow=downFlow;
        this.sumFlow=this.upFlow+this.downFlow;
    }

    @Override
    public String toString() {
        return getUpFlow()+" "+getDownFlow()+" "+getSumFlow();
    }

    //序列化处理
    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeLong(this.getUpFlow());
        dataOutput.writeLong(this.getDownFlow());
        dataOutput.writeLong(this.getSumFlow());

    }
    //反序列化处理
    @Override
    public void readFields(DataInput dataInput) throws IOException {
        setUpFlow(dataInput.readLong());
        setDownFlow(dataInput.readLong());
        setSumFlow(dataInput.readLong());
    }

    @Override
    public int compareTo(FlowBean o) {
        if (o.getSumFlow()>this.getSumFlow()){
            return 1;
        }else
            return -1;
    }
}

//FlowPartitioner类继承Partitioner类，可以定义以什么开头的手机号输出到哪个文件

package com.gec.demo.partitioner;

import com.gec.demo.Bean.FlowBean;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class FlowPartitioner extends Partitioner<FlowBean,Text> {
    @Override
    public int getPartition(FlowBean flowBean, Text text, int i) {
        String phoneNum=text.toString();
        String headThreePhoneNum=phoneNum.substring(0,3);
        if(headThreePhoneNum.equals("134")){
            return 0;
        }else if(headThreePhoneNum.equals("135")){
            return 1;
        }else if(headThreePhoneNum.equals("136")){
            return 2;
        }else if(headThreePhoneNum.equals("137")){
            return 3;
        }else if(headThreePhoneNum.equals("138")){
            return 4;
        }else{
            return  5;
        }
    }
}

package com.gec.demo;

import com.gec.demo.Bean.FlowBean;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
import org.mortbay.util.StringUtil;

import java.io.IOException;

public class PhoneFlowMapper extends Mapper<LongWritable, Text,FlowBean,Text> {
    private  FlowBean flowBean=new FlowBean();
    private  Text text=new Text();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line=value.toString();
        String[] fields= StringUtils.split(line,‘\t‘);
        String phoneNum=fields[1];
       long upFlow=Long.parseLong(fields[fields.length-3]);
       long downFlow=Long.parseLong(fields[fields.length-2]);
       flowBean.setFlowData(upFlow,downFlow);
       flowBean.setSumFlow(upFlow+downFlow);
       text.set(phoneNum);
       context.write(flowBean,text);
        }

    }

package com.gec.demo;

import com.gec.demo.Bean.FlowBean;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class PhoneFlowReducer extends Reducer<FlowBean,Text,Text,FlowBean> {
    private  FlowBean flowBean=new FlowBean();

    @Override
    protected void reduce(FlowBean key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        //有没有分组合并操作？

       context.write(values.iterator().next(),key);
    }
}

package com.gec.demo;

import com.gec.demo.Bean.FlowBean;
import com.gec.demo.partitioner.FlowPartitioner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;

public class PhoneFlowApp  {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration=new Configuration();
        Job job=Job.getInstance(configuration);
        job.setJarByClass(PhoneFlowApp.class);
        job.setMapperClass(PhoneFlowMapper.class);
        job.setReducerClass(PhoneFlowReducer.class);
        job.setMapOutputKeyClass(FlowBean.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setPartitionerClass(FlowPartitioner.class);
        job.setNumReduceTasks(6);
        //指定要处理的数据所在的位置
        FileInputFormat.setInputPaths(job, "D://Bigdata//4、mapreduce//day02//HTTP_20130313143750.dat");
        //指定处理完成之后的结果所保存的位置
        FileOutputFormat.setOutputPath(job, new Path("D://Bigdata//4、mapreduce//day02//output"));
        //向yarn集群提交这个job
        boolean res = job.waitForCompletion(true);
        System.exit(res?0:1);

    }
}

原文地址：https://www.cnblogs.com/Transkai/p/10485257.html

时间： 2024-11-05 20:30:56

Hadoop Mapreduce 案例统计手机流量使用情况

需要被统计流量的文件内容如下：

Hadoop Mapreduce 案例统计手机流量使用情况的相关文章

统计手机流量

android获取手机流量使用情况

Hadoop MapReduce编程 API入门系列之网页流量版本1（二十一）

Hadoop MapReduce编程 API入门系列之网页流量版本1（二十二）

精品软件推荐流量报表软件 networx 统计电脑网络用的流量用手机流量卡的特别有用

Hadoop MapReduce编程入门案例

MapReduce 单词统计案例编程

第2节 mapreduce深入学习：8、手机流量汇总求和

Hadoop MapReduce编程 API入门系列之统计学生成绩版本2（十八）

Hadoop Mapreduce 案例 统计手机流量使用情况

需要被统计流量的文件内容如下：

Hadoop Mapreduce 案例 统计手机流量使用情况的相关文章

Hadoop Mapreduce 案例统计手机流量使用情况

Hadoop Mapreduce 案例统计手机流量使用情况的相关文章