7、MapReduce实现自定义排序功能

网友投稿 232 2022-11-26

7、MapReduce实现自定义排序功能

本文测试文本:

tom 20 8000 nancy 22 8000 ketty 22 9000 stone 19 10000 green 19 11000 white 39 29000 socrates 30 40000

import org.apache.hadoop.io.WritableComparable; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; public class Person implements WritableComparable { private String name; private int age; private int salary; public Person() { } public Person(String name, int age, int salary) { //super(); this.name = name; this.age = age; this.salary = salary; } public String getName() { return name; } public void setName(String name) { this.name = name; } public int getAge() { return age; } public void setAge(int age) { this.age = age; } public int getSalary() { return salary; } public void setSalary(int salary) { this.salary = salary; } @Override public String toString() { return this.salary + " " + this.age + " " + this.name; } //先比较salary,高的排序在前;若相同,age小的在前 public int compareTo(Person o) { int compareResult1= this.salary - o.salary; if(compareResult1 != 0) { return -compareResult1; } else { return this.age - o.age; } } //序列化,将NewKey转化成使用流传送的二进制 public void write(DataOutput dataOutput) throws IOException { dataOutput.writeUTF(name); dataOutput.writeInt(age); dataOutput.writeInt(salary); } //使用in读字段的顺序,要与write方法中写的顺序保持一致 public void readFields(DataInput dataInput) throws IOException { //read string this.name = dataInput.readUTF(); this.age = dataInput.readInt(); this.salary = dataInput.readInt(); } }

MapReuduce程序:

import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; import java.net.URI; public class SecondarySort { public static void main(String[] args) throws Exception { System.setProperty("HADOOP_USER_NAME","hadoop2.7"); Configuration configuration = new Configuration(); //设置本地运行的mapreduce程序 jar包 configuration.set("mapreduce.job.jar","C:\\Users\\tanglei1\\IdeaProjects\\Hadooptang\\target\\com.kaikeba.hadoop-1.0-SNAPSHOT.jar"); Job job = Job.getInstance(configuration, SecondarySort.class.getSimpleName()); FileSystem fileSystem = FileSystem.get(URI.create(args[1]), configuration); if (fileSystem.exists(new Path(args[1]))) { fileSystem.delete(new Path(args[1]), true); } FileInputFormat.setInputPaths(job, new Path(args[0])); job.setMapperClass(MyMap.class); job.setMapOutputKeyClass(Person.class); job.setMapOutputValueClass(NullWritable.class); //设置reduce的个数 job.setNumReduceTasks(1); job.setReducerClass(MyReduce.class); job.setOutputKeyClass(Person.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } public static class MyMap extends Mapper { //LongWritable:输入参数键类型,Text:输入参数值类型 //Persion:输出参数键类型,NullWritable:输出参数值类型 @Override //map的输出值是键值对,NullWritable说关心V的值 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //LongWritable key:输入参数键值对的键,Text value:输入参数键值对的值 //获得一行数据,输入参数的键(距首行的位置),Hadoop读取数据的时候逐行读取文本 //fields:代表着文本一行的的数据 String[] fields = value.toString().split(" "); // 本列中文本一行数据:nancy 22 8000 String name = fields[0]; //字符串转换成int int age = Integer.parseInt(fields[1]); int salary = Integer.parseInt(fields[2]); //在自定义类中进行比较 Person person = new Person(name, age, salary); context.write(person, NullWritable.get()); } } public static class MyReduce extends Reducer { @Override protected void reduce(Person key, Iterable values, Context context) throws IOException, InterruptedException { context.write(key, NullWritable.get()); } } }

运行结果:

40000 30 socrates 29000 39 white 11000 19 green 10000 19 stone 9000 22 ketty 8000 20 tom 8000 22 nancy

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:python基础知识 01 python模块
下一篇:Modbus通讯,四象限电能三相电能表ACR120E
相关文章

 发表评论

暂时没有评论,来抢沙发吧~