MapReducer 中MapJoin示例

网友投稿 253 2022-11-25

MapReducer 中MapJoin示例

自动定义类

package groupby;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;

public class join implements Writable {

private int empno ; private String ename ; private String deptno ; private String deptname ; private String flage ; public join(int empno,String ename,String deptno,String deptname,String flage ){ this.ename =ename ; this.deptname = deptname ; this.deptno = deptno ; this.empno = empno ; this.flage = flage ; } public join(){} public int getEmpno() { return empno; } public void setEmpno(int empno) { this.empno = empno; } public String getEname() { return ename; } public void setEname(String ename) { this.ename = ename; } public String getDeptno() { return deptno; } public void setDeptno(String deptno) { this.deptno = deptno; } public String getDeptname() { return deptname; } public void setDeptname(String deptname) { this.deptname = deptname; } public String getFlage() { return flage; } public void setFlage(String flage) { this.flage = flage; } @Override public String toString() { return empno + "\t" + ename +"\t" + deptno + "\t" + deptname ; } @Override public void write(DataOutput out) throws IOException { out.writeInt(empno); out.writeUTF(ename); out.writeUTF(deptno); out.writeUTF(deptname); out.writeUTF(flage); } @Override public void readFields(DataInput in) throws IOException { this.empno = in.readInt(); this.ename = in.readUTF() ; this.deptno = in.readUTF() ; this.deptname = in.readUTF(); this.flage = in.readUTF() ; }

}

mapjoin 示例代码

package groupby;

import org.apache.commons.lang.StringUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.*;import java.net.URI;import java.net.URISyntaxException;import java.util.HashMap;

import static org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getOutputPath;

public class mapJoin {public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {

String input = "data1/emp.txt" ; String output = "out2" ; final Configuration co = new Configuration() ; //获取 Job 对象 final Job job = Job.getInstance(co); //小表添加到缓存中 job.addCacheFile(new URI("data1/dept.txt")); //设置class job.setJarByClass(groupby.mapreduce.class); //设置mapper 和 Reduce job.setMapperClass(MyMapper.class); // job.setReducerClass(MyReducer.class); //设置 Mapper 阶段输出数据的key 和value job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(join.class); //设置 Reducer 阶段输出数据的key 和value /* job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(join.class);*/ //设置输入和输出路径 FileInputFormat.setInputPaths(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); //删除输出路径中的 文件 Path outDir = getOutputPath(job) ; if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) { File file = new File(outDir.toUri()) ; if(file.isDirectory()){ File[] childrenFiles = file.listFiles(); for (File childFile:childrenFiles){ childFile.delete() ; } } file.delete(); } //提交 job final boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); } /** * 文件中的偏移量,单行文件内容, 分类的key , 存储数据自定义的类 * * */ public static class MyMapper extends Mapper { private HashMap catchFile = new HashMap() ; @Override protected void setup(Context context) throws IOException { String path = context.getCacheFiles()[0].getPath(); BufferedReader read = new BufferedReader(new InputStreamReader(new FileInputStream(path), "UTF-8")); String line ; while(StringUtils.isNotEmpty(line = read.readLine())){ String[] sp = line.split("\t") ; catchFile.put(sp[0].trim(),sp[1].trim()) ; } } //int empno,String ename,String deptno,String deptname,String flage @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] info = value.toString().split("\t") ; if(info.length == 8){ context.write(NullWritable.get(),new join(Integer.valueOf(info[0].trim()),info[1].trim(),info[7].trim(),catchFile.getOrDefault(info[7].trim(),""),"")); } } }

}

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:使用MapReducer将文件写入mysql 数据库
下一篇:小疆智控MODBUS转PROFINET网关产品简介
相关文章

 发表评论

暂时没有评论,来抢沙发吧~