MR作业编程案例-流量统计

流量统计(统计每个用户的上行流量和下行流量及其流量总和)



源数据:

1363157985066     13726230503    00-FD-07-A4-72-B8:CMCC    120.196.100.82    i02.c.aliimg.com        24    27    2481    24681    200

1363157995052     13826544101    5C-0E-8B-C7-F1-E0:CMCC    120.197.40.4            4    0    264    0    200

1363157991076     13926435656    20-10-7A-28-CC-0A:CMCC    120.196.100.99            2    4    132    1512    200

1363154400022     13926251106    5C-0E-8B-8B-B1-50:CMCC    120.197.40.4            4    0    240    0    200

1363157993044     18211575961    94-71-AC-CD-E6-18:CMCC-EASY    120.196.100.99    iface.qiyi.com    视频网站    15    12    1527    2106    200

1363157995074     84138413    5C-0E-8B-8C-E8-20:7DaysInn    120.197.40.4    122.72.52.12        20    16    4116    1432    200

1363157993055     13560439658    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            18    15    1116    954    200

1363157995033     15920133257    5C-0E-8B-C7-BA-20:CMCC    120.197.40.4    sug.so.360.cn    信息安全    20    20    3156    2936    200

1363157983019     13719199419    68-A1-B7-03-07-B1:CMCC-EASY    120.196.100.82            4    0    240    0    200

1363157984041     13660577991    5C-0E-8B-92-5C-20:CMCC-EASY    120.197.40.4    s19.cnzz.com    站点统计    24    9    6960    690    200

1363157973098     15013685858    5C-0E-8B-C7-F7-90:CMCC    120.197.40.4    rank.ie.sogou.com    搜索引擎    28    27    3659    3538    200

1363157986029     15989002119    E8-99-C4-4E-93-E0:CMCC-EASY    120.196.100.99    www.umeng.com    站点统计    3    3    1938    180    200

1363157992093     13560439658    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            15    9    918    4938    200

1363157986041     13480253104    5C-0E-8B-C7-FC-80:CMCC-EASY    120.197.40.4            3    3    180    180    200

1363157984040     13602846565    5C-0E-8B-8B-B6-00:CMCC    120.197.40.4    2052.flash2-http.qq.com    综合门户    15    12    1938    2910    200

1363157995093     13922314466    00-FD-07-A2-EC-BA:CMCC    120.196.100.82    img.qfc.cn        12    12    3008    3720    200

1363157982040     13502468823    5C-0A-5B-6A-0B-D4:CMCC-EASY    120.196.100.99    y0.ifengimg.com    综合门户    57    102    7335    110349    200

1363157986072     18320173382    84-25-DB-4F-10-1A:CMCC-EASY    120.196.100.99    input.shouji.sogou.com    搜索引擎    21    18    9531    2412    200

1363157990043     13925057413    00-1F-64-E1-E6-9A:CMCC    120.196.100.55    t3.baidu.com    搜索引擎    69    63    11058    48243    200

1363157988072     13760778710    00-FD-07-A4-7B-08:CMCC    120.196.100.82            2    2    120    120    200

1363157985066     13726238888    00-FD-07-A4-72-B8:CMCC    120.196.100.82    i02.c.aliimg.com        24    27    2481    24681    200

1363157993055     13560436666    C4-17-FE-BA-DE-D9:CMCC    120.196.100.99            18    15    1116    954    200

1、第一次作业:

①封装FlowBean

package com.it18zhang.flowdemo;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class FlowBean implements WritableComparable<FlowBean> {

private long upFlow;

private long downFlow;

private long sumFlow;

public FlowBean() {

}

public FlowBean(long upFlow, long downFlow) {

this.upFlow = upFlow;

this.downFlow = downFlow;

this.sumFlow = this.upFlow + this.downFlow;

}

public long getUpFlow() {

return upFlow;

}

public void setUpFlow(long upFlow) {

this.upFlow = upFlow;

}

public long getDownFlow() {

return downFlow;

}

public void setDownFlow(long downFlow) {

this.downFlow = downFlow;

}

public long getSumFlow() {

return sumFlow;

}

@Override

public String toString() {

return upFlow + "\t" + downFlow + "\t" + sumFlow;

}

public void write(DataOutput out) throws IOException {

out.writeLong(upFlow);

out.writeLong(downFlow);

out.writeLong(sumFlow);

}

public void readFields(DataInput in) throws IOException {

upFlow = in.readLong();

downFlow = in.readLong();

sumFlow = in.readLong();

}

public int compareTo(FlowBean o) {

return this.sumFlow - o.getSumFlow() > 0 ? -1 : 1;

}

}

②Mapper

package com.it18zhang.flowdemo;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean> {

@Override

protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, FlowBean>.Context context)

throws IOException, InterruptedException {

String[] splits = value.toString().split("\t");

String tel = splits[1];

long upFlow = Integer.parseInt(splits[splits.length - 2]);

long downFlow = Integer.parseInt(splits[splits.length - 3]);

FlowBean fb = new FlowBean(upFlow, downFlow);

context.write(new Text(tel), fb);

}

}

③Reducer

package com.it18zhang.flowdemo;

import java.io.IOException;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean> {

@Override

protected void reduce(Text key, Iterable<FlowBean> values, Context context)

throws IOException, InterruptedException {

long upFlow = 0;

long downFlow = 0;

for(FlowBean value : values){

upFlow = value.getUpFlow();

downFlow = value.getDownFlow();

}

FlowBean fb = new FlowBean(upFlow,downFlow);

context.write(key, fb);

}

}

④App

package com.it18zhang.flowdemo;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FlowCountApp {

public static void main(String[] args) throws Exception {

//新建Job

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJobName("FlowCountApp");

job.setJarByClass(FlowCountApp.class);

//设置Mapper信息

job.setMapperClass(FlowCountMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(FlowBean.class);

//设置Reducer信息

job.setReducerClass(FlowCountReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(FlowBean.class);

//设置输入输出路径

FileInputFormat.setInputPaths(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

//提交作业

System.out.println(job.waitForCompletion(true) ? 0 : 1);

System.out.println("Job Finished");

}

}

结果

13480253104    180    180    360

13502468823    110349    7335    117684

13560436666    954    1116    2070

13560439658    5892    2034    7926

13602846565    2910    1938    4848

13660577991    690    6960    7650

13719199419    0    240    240

13726230503    24681    2481    27162

13726238888    24681    2481    27162

13760778710    120    120    240

13826544101    0    264    264

13922314466    3720    3008    6728

13925057413    48243    11058    59301

13926251106    0    240    240

13926435656    1512    132    1644

15013685858    3538    3659    7197

15920133257    2936    3156    6092

15989002119    180    1938    2118

18211575961    2106    1527    3633

18320173382    2412    9531    11943

84138413    1432    4116    5548

2、第二次作业:

①Mapper

package com.it18zhang.flowdemo;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class SortMapper extends Mapper<LongWritable, Text, FlowBean, Text> {

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String[] splits = value.toString().split("\t");

String tel = splits[0];

long upFlow = Long.parseLong(splits[1]);

long downFlow = Long.parseLong(splits[2]);

FlowBean fb = new FlowBean(upFlow,downFlow);

context.write(fb, new Text(tel));

}

}

②Reducer

package com.it18zhang.flowdemo;

import java.io.IOException;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class SortReducer extends Reducer<FlowBean, Text, Text, FlowBean> {

@Override

protected void reduce(FlowBean key, Iterable<Text> values, Context context)

throws IOException, InterruptedException {

context.write(values.iterator().next(), key);

}

}

③App

package com.it18zhang.flowdemo;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SortApp {

public static void main(String[] args) throws Exception {

// 新建Job

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJobName("SortApp");

job.setJarByClass(SortApp.class);

// 设置Mapper信息

job.setMapperClass(SortMapper.class);

job.setMapOutputKeyClass(FlowBean.class);

job.setMapOutputValueClass(Text.class);

// 设置Reducer信息

job.setReducerClass(SortReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(FlowBean.class);

// 设置输入输出路径

FileInputFormat.setInputPaths(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

// 提交作业

System.out.println(job.waitForCompletion(true) ? 0 : 1);

System.out.println("Job Finished");

}

}

结果

13502468823    110349    7335    117684

13925057413    48243    11058    59301

13726238888    24681    2481    27162

13726230503    24681    2481    27162

18320173382    2412    9531    11943

13660577991    690    6960    7650

15013685858    3538    3659    7197

13922314466    3720    3008    6728

15920133257    2936    3156    6092

13560439658    4938    918    5856

84138413    1432    4116    5548

13602846565    2910    1938    4848

18211575961    2106    1527    3633

15989002119    180    1938    2118

13560436666    954    1116    2070

13926435656    1512    132    1644

13480253104    180    180    360

13826544101    0    264    264

13926251106    0    240    240

13760778710    120    120    240

13719199419    0    240    240

时间: 2025-01-18 10:29:57

MR作业编程案例-流量统计的相关文章

Android应用流量统计——NetworkStatsManager使用

在没有Root的情况下,Android应用流量统计在6.0之前一直没有太好的办法,官方虽然提供了TrafficStats,但其主要功能是设备启动以来流量的统计信息,和时间信息无法很好的配合.最近再看TrafficStats类时,发现说明中提到,为获取更具鲁棒性的网络历史数据,建议使用NetworkStatsManager. 本文首先简单对比下TrafficStats和NetworkStatsManager各自的限制和优缺点,然后详细说明NetworkStatsManager的用法,并给出主要代码

网站流量统计系统 phpMyVisites

phpMyVisites是一个网站流量统计系统,它能够提供非常详细的统计报告和高级图形报表.phpMyVisites不是一个Apache log分析工具,它建有自己的log.它的特点包括: 安装部署:提供可视化的安装向导,并且使用简单只要把一段简单的Javascript代码添加到你网站的页面中就能够马上开始收集访问者的信息. 一个简洁,友好的界面可用来展示数据和帮助数据分析. 提供简洁,明了的图形分析报表. 安全:phpMyVisites能够最大限度地阻止入侵和外部攻击. 支持多网站统计 多用户

android app 流量统计

https://blog.csdn.net/yzy9508/article/details/48300265 | android 数据流量统计 - CSDN博客https://blog.csdn.net/forlong401/article/details/8440160 | android如何开发流量监控软件 - CSDN博客https://stackoverflow.com/questions/12613402/android-statistic-3g-traffic-for-each-ap

商超进销存编程案例

商超进销存编程案例:如百货超市.母婴用品店.鞋店.服装店.化妆品店.五金建材.美容店.仓库库存管理等. 1.支持电脑.笔记本.收款机上使用该软件2.商品进货入库.销售商品.库存统计3.库存盘点,可以在营业中进行盘点,商品入库的同时显示库存数量4.财务管理,客户信息.客户往来账,供货商信息.供货商往来账5.会员生日提醒,可以自由查看还有几天过生日的会员6.会员管理,会员充值.积分.刷卡消费7.多种记账方式:如支付宝.微信.签单.现金.银行卡.代金券等8.刷卡消费支持多种混合支付,如会员卡.现金.微

栅格重分类和条件函数均可以实现对流量统计数据进行定义划分

ArcGIS水分分析工具的流向分析是基于D8单流向算法,如果分析使用的DEM存在凹陷点,就会产生汇,导致径流断流从而影响了分析结果.在前面章节<ArcGIS水文分析实战教程(2)ArcGIS水文分析工具的基本原理>中又介绍过D8算法,而<ArcGIS水文分析实战教程(4)地形预处理>章节中笔者也较少过如何创建无凹陷点得DEM数据,在使用流向分析工具之前可以先行阅读. 首先流向分析要使用填洼过的数据,确保DEM数据没有凹陷点.如果数据准备妥当,直接使用水文分析工具箱中的[流向]工具进

手机卫士10_widget_流量统计_手机杀毒

1._widget入门: 查看帮助文档>>Developrs>>API Guides>>App Widgets 实际上是一个迷你的应用程序VIew视图,嵌入在另外一个应用程序视图. 标准的android和兼容widget的手机才能显示,被修改过的系统是无法显示的. 实现步骤: ①创建类继承APPWidgetProvider//一个方便的帮助类,用来实现一个appwidget 它继承了广播接收者,实现原理也是通过广播实现的,特殊的广播接收者. ②在清单文件里配置广播接收者

linux下如何使用vnstat查看服务器带宽流量统计

因为很多vps或者服务器都是限流量的,但是又很多服务商并没有提供详细的流量表,比如每天的流量表,所以肯定有人很想知道自己服务器到底跑了多少流量. vnstat就是一个很好用的服务器流量统计命令.我截几个图给大家看下就知道了. 统计天数和统计月份的: 是不是很直观呢.下面我们就来看看这个命令怎么用.estimated是预估使用量的意思.最后一列avg. rate是平均使用带宽.rx是接收流量(inbound),tx是发送流量(outbound) 第一步:安装 centos需要先安装epel源后才能

Android中进行流量统计

// ---------------------流量统计-------------------------------- try { PackageManager pm = getPackageManager(); ApplicationInfo ai = pm.getApplicationInfo("com.test.app", PackageManager.GET_ACTIVITIES);// com.test.app为自己应用的包名 Log.d("!!", &

OpenCV编程-&gt;RGB直方图统计

我们在处理彩色图像时,特别是在做局部图像的阈值分割时,需要一个直观的RGB统计图. 接下来开始实现. 代码: void CalcHistRGB() { IplImage* img_source; if (img_source = cvLoadImage("101.jpg",1)) { IplImage* RedChannel = cvCreateImage( cvGetSize(img_source), 8, 1); IplImage* GreenChannel = cvCreateI