package com.yw.hadoop273;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /**
* @Auther: YW
* @Date: 2019/9/18 20:58
* @Description:
*/
public class WCWordCount extends Mapper<LongWritable, Text, Text, IntWritable> {
/*
* Mapper
  * 把单词分割出来
* */
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
Text keyOut = new Text();
IntWritable valueOut = new IntWritable();
String[] arr = value.toString().split("");
for (String s : arr) {
keyOut.set(s);
valueOut.set();
context.write(keyOut,valueOut);
}
}
}

package com.yw.hadoop273;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /**
* @Auther: YW
* @Date: 2019/9/18 21:20
* @Description:
*/
public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
/***
* 聚合
*/ @Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count=0;
for (IntWritable value : values) {
count = count + value.get();
}
context.write(key,new IntWritable(count));
} }
package com.yw.hadoop273;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; /**
* @Auther: YW
* @Date: 2019/9/16 21:20
* @Description:
*/
public class WCApp {
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
// 删除已有的目录
if (args.length>1){
FileSystem.get(conf).delete(new Path(args[1]));
} Job job = Job.getInstance(conf);
// 设置job属性
job.setJobName("WCApp"); // 设置作业名称
job.setJarByClass(WCApp.class); // 设置搜索类
job.setInputFormatClass(TextInputFormat.class);// 设置输入格式 FileInputFormat.addInputPath(job,new Path(args[0])); // 输入路径
FileOutputFormat.setOutputPath(job,new Path(args[1]));// 输出路径
job.setMapperClass(WCWordCount.class); // 设置mapper 类
job.setReducerClass(WCReducer.class); // 设置reducer类
job.setNumReduceTasks(1); // reducer个数
job.setMapOutputKeyClass(Text.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputKeyClass(IntWritable.class);
} }

最新文章

  1. Hibernate 延迟加载原理
  2. 浅谈2D游戏设计模式2- WZ文件详解(UI.WZ)之MapLogin.img(1)
  3. Memcached &amp; Redis使用
  4. C++ essentials 之 static 关键字
  5. Windows Server 2008 R2 创建辅助DNS服务器并接管主要DNS服务器
  6. sass开发过程中遇到的几个坑
  7. 一、HTML和CSS基础--网页布局--实践--固定层效果
  8. Eclipse如何生成jar包
  9. DEBUG模式下屏蔽某些烦人的动态日志信息
  10. ecshop 优化_将商品详情页goods.php重命名为shangpin.php
  11. labview事件结构
  12. POJ 1185 状态压缩DP 炮兵阵地
  13. 英文长单词断行 word-break VS word-wrap
  14. EditTable可编辑的表格
  15. gm8180:arm linux启动加载模块、运行程序
  16. vue UI库iview源码解析(2)
  17. Java设计模式---Strategy策略模式
  18. Asp.Net路由重写为用户名或者ID
  19. Exception in thread &quot;main&quot; java.lang.UnsupportedClassVersionError
  20. js中html拼接

热门文章

  1. arcpy显示指定表的索引属性
  2. elasticsearch 的入门
  3. web-linux-shell实现 阿里方案canvas+wss。
  4. nice -n 10 bash 和 chrt 10 bash 和 echo -17 &gt; /proc/PID/oom_score_adj
  5. 使用注解@Slf4j简化Logger的初始化
  6. openresty开发系列16--lua中的控制结构if-else/repeat/for/while
  7. ISO/IEC 9899:2011 条款6.5.6——加法操作符
  8. 泡泡一分钟:Semi-Dense Visual-Inertial Odometry and Mapping for Quadrotors with SWAP Constraints
  9. 译文:A Robust and Modular Multi-Sensor Fusion ApproachApplied to MAV Navigation
  10. Django架站的16堂課