本篇文章小编给大家分享一下使用hadoop来提取文件中的指定内容代码示例,文章代码介绍的很详细,小编觉得挺不错的,现在分享给大家供大家参考,有需要的小伙伴们可以来看看。
一、需求
把以下txt中含“baidu”字符串的链接输出到一个文件,否则输出到另外一个文件。
二、步骤
1.LogMapper.java
package com.whj.mapreduce.outputformat; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class LogMapper extends Mapper{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 不做任何处理 context.write(value,NullWritable.get()); } }
2.LogReducer.java
package com.whj.mapreduce.outputformat; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class LogReducer extends Reducer{ @Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { for (NullWritable value : values) { context.write(key,NullWritable.get()); } } }
3.LogOutputFormat.java
package com.whj.mapreduce.outputformat; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class LogOutputFormat extends FileOutputFormat{ @Override public RecordWriter getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { LogRecordWriter lrw = new LogRecordWriter(job); return lrw; } }
4.LogRecordWriter.java
package com.whj.mapreduce.outputformat; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import java.io.IOException; public class LogRecordWriter extends RecordWriter{ private FSDataOutputStream baiduOut;//ctrl+alt+f private FSDataOutputStream otherOut; public LogRecordWriter(TaskAttemptContext job) throws IOException { //创建两条流 FileSystem fs = FileSystem.get(job.getConfiguration()); baiduOut = fs.create(new Path("D:tempoutputformat.log")); otherOut = fs.create(new Path("D:tempother.log")); } @Override public void write(Text key, NullWritable nullWritable) throws IOException, InterruptedException { // 具体写 String log = key.toString(); if(log.contains("baidu")){ baiduOut.writeBytes(log+"n"); }else{ otherOut.writeBytes(log+"n"); } } @Override public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { //关流 IOUtils.closeStream(baiduOut); IOUtils.closeStream(otherOut); } }
5.LogDriver.java
package com.whj.mapreduce.outputformat; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class LogDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf); job.setJarByClass(LogDriver.class); job.setMapperClass(LogMapper.class); job.setReducerClass(LogReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); //设置自定义的 outputformat job.setOutputFormatClass(LogOutputFormat.class); FileInputFormat.setInputPaths(job, new Path("D:input")); // 虽 然 我 们 自 定 义 了 outputformat , 但 是 因 为 我 们 的 outputformat 继承自fileoutputformat //而 fileoutputformat 要输出一个_SUCCESS 文件,所以在这还得指定一个输出目录 FileOutputFormat.setOutputPath(job, new Path("D:templogoutput")); boolean b = job.waitForCompletion(true); System.exit(b ? 0 : 1); } }
三、结果
茶杯头甜蜜终章dlc 官方手机版v1.0.0.3
下载火柴人传说暗影格斗内置菜单 最新版v3.0.1
下载荒野乱斗测试服 安卓版v61.10.3
下载荒野乱斗彩虹服 安卓版v61.10.3
下载寒霜启示录 安卓版v1.25.10
寒霜启示录是一款生存模拟游戏,不少玩家可能对于末日都有着自己
末日城堡免广告版 安卓最新版v0.7.1
末日城堡免广告版是一款非常好玩的模拟经营类游戏,内部可以不看
甜蜜人生模拟器 最新版v1.4.5
甜蜜人生模拟器是一款非常好玩的模拟恋爱手游,玩家在这里能够对
武器锻造师内置功能菜单 v10.4
武器锻造师内置菜单版是游戏的破解版本,在该版本中为玩家提供了
开放空间overfield 安卓版v1.0.5
开放空间Overfield是一款箱庭养成经营手游,让你在广阔