#yyds干货盘点# Hadoop序列化详解及代码实操

网友投稿 249 2022-11-22

#yyds干货盘点# Hadoop序列化详解及代码实操

一、Hadoop序列化

1. 序列化概述

(1)编写流量统计的Bean对象

import org.apache.hadoop.io.Writable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; //1 继承Writable接口 public class FlowBean implements Writable { private long upFlow; //上行流量 private long downFlow; //下行流量 private long sumFlow; //总流量 //2 提供无参构造 public FlowBean() { } //3 提供三个参数的getter和setter方法 public long getUpFlow() { return upFlow; } public void setUpFlow(long upFlow) { this.upFlow = upFlow; } public long getDownFlow() { return downFlow; } public void setDownFlow(long downFlow) { this.downFlow = downFlow; } public long getSumFlow() { return sumFlow; } public void setSumFlow(long sumFlow) { this.sumFlow = sumFlow; } public void setSumFlow() { this.sumFlow = this.upFlow + this.downFlow; } //4 实现序列化和反序列化方法,注意顺序一定要保持一致 @Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeLong(upFlow); dataOutput.writeLong(downFlow); dataOutput.writeLong(sumFlow); } @Override public void readFields(DataInput dataInput) throws IOException { this.upFlow = dataInput.readLong(); this.downFlow = dataInput.readLong(); this.sumFlow = dataInput.readLong(); } //5 重写ToString @Override public String toString() { return upFlow + "\t" + downFlow + "\t" + sumFlow; } }

(2)编写Mapper类

import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class FlowMapper extends Mapper { private Text outK = new Text(); private FlowBean outV = new FlowBean(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //1 获取一行数据,转成字符串 String line = value.toString(); //2 切割数据 String[] split = line.split("\t"); //3 抓取我们需要的数据:手机号,上行流量,下行流量 String phone = split[1]; String up = split[split.length - 3]; String down = split[split.length - 2]; //4 封装outK outV outK.set(phone); outV.setUpFlow(Long.parseLong(up)); outV.setDownFlow(Long.parseLong(down)); outV.setSumFlow(); //5 写出outK outV context.write(outK, outV); } }

(3)编写Reducer类

import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class FlowReducer extends Reducer { private FlowBean outV = new FlowBean(); @Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { long totalUp = 0; long totalDown = 0; //1 遍历values,将其中的上行流量,下行流量分别累加 for (FlowBean flowBean : values) { totalUp += flowBean.getUpFlow(); totalDown += flowBean.getDownFlow(); } //2 封装outKV outV.setUpFlow(totalUp); outV.setDownFlow(totalDown); outV.setSumFlow(); //3 写出outK outV context.write(key,outV); } }

(4)编写Driver驱动类

import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class FlowDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { //1 获取job对象 Configuration conf = new Configuration(); Job job = Job.getInstance(conf); //2 关联本Driver类 job.setJarByClass(FlowDriver.class); //3 关联Mapper和Reducer job.setMapperClass(FlowMapper.class); job.setReducerClass(FlowReducer.class); //4 设置Map端输出KV类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(FlowBean.class); //5 设置程序最终输出的KV类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(FlowBean.class); //6 设置程序的输入输出路径 FileInputFormat.setInputPaths(job, new Path("D:\\inputflow")); FileOutputFormat.setOutputPath(job, new Path("D:\\flowoutput")); //7 提交Job boolean b = job.waitForCompletion(true); System.exit(b ? 0 : 1); } }

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:SpringBoot项目集成xxljob实现全纪录
下一篇:可支持电阻温度检测器的高精度接口
相关文章

 发表评论

暂时没有评论,来抢沙发吧~