Hadoop 求单词count数
2024-08-27 01:49:38
package com.yw.hadoop273; import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /**
* @Auther: YW
* @Date: 2019/9/18 20:58
* @Description:
*/
public class WCWordCount extends Mapper<LongWritable, Text, Text, IntWritable> {
/*
* Mapper
* 把单词分割出来
* */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
Text keyOut = new Text();
IntWritable valueOut = new IntWritable();
String[] arr = value.toString().split("");
for (String s : arr) {
keyOut.set(s);
valueOut.set();
context.write(keyOut,valueOut);
}
}
}
package com.yw.hadoop273; import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /**
* @Auther: YW
* @Date: 2019/9/18 21:20
* @Description:
*/
public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
/***
* 聚合
*/ @Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count=0;
for (IntWritable value : values) {
count = count + value.get();
}
context.write(key,new IntWritable(count));
} }
package com.yw.hadoop273; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; /**
* @Auther: YW
* @Date: 2019/9/16 21:20
* @Description:
*/
public class WCApp {
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
// 删除已有的目录
if (args.length>1){
FileSystem.get(conf).delete(new Path(args[1]));
} Job job = Job.getInstance(conf);
// 设置job属性
job.setJobName("WCApp"); // 设置作业名称
job.setJarByClass(WCApp.class); // 设置搜索类
job.setInputFormatClass(TextInputFormat.class);// 设置输入格式 FileInputFormat.addInputPath(job,new Path(args[0])); // 输入路径
FileOutputFormat.setOutputPath(job,new Path(args[1]));// 输出路径
job.setMapperClass(WCWordCount.class); // 设置mapper 类
job.setReducerClass(WCReducer.class); // 设置reducer类
job.setNumReduceTasks(1); // reducer个数
job.setMapOutputKeyClass(Text.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
} }
最新文章
- Hibernate 延迟加载原理
- 浅谈2D游戏设计模式2- WZ文件详解(UI.WZ)之MapLogin.img(1)
- Memcached &; Redis使用
- C++ essentials 之 static 关键字
- Windows Server 2008 R2 创建辅助DNS服务器并接管主要DNS服务器
- sass开发过程中遇到的几个坑
- 一、HTML和CSS基础--网页布局--实践--固定层效果
- Eclipse如何生成jar包
- DEBUG模式下屏蔽某些烦人的动态日志信息
- ecshop 优化_将商品详情页goods.php重命名为shangpin.php
- labview事件结构
- POJ 1185 状态压缩DP 炮兵阵地
- 英文长单词断行 word-break VS word-wrap
- EditTable可编辑的表格
- gm8180:arm linux启动加载模块、运行程序
- vue UI库iview源码解析(2)
- Java设计模式---Strategy策略模式
- Asp.Net路由重写为用户名或者ID
- Exception in thread ";main"; java.lang.UnsupportedClassVersionError
- js中html拼接
热门文章
- arcpy显示指定表的索引属性
- elasticsearch 的入门
- web-linux-shell实现 阿里方案canvas+wss。
- nice -n 10 bash 和 chrt 10 bash 和 echo -17 >; /proc/PID/oom_score_adj
- 使用注解@Slf4j简化Logger的初始化
- openresty开发系列16--lua中的控制结构if-else/repeat/for/while
- ISO/IEC 9899:2011 条款6.5.6——加法操作符
- 泡泡一分钟:Semi-Dense Visual-Inertial Odometry and Mapping for Quadrotors with SWAP Constraints
- 译文:A Robust and Modular Multi-Sensor Fusion ApproachApplied to MAV Navigation
- Django架站的16堂課