package com.gylhaut.hadoop.senior.mapreduce;

import java.io.IOException;
import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /**
* Shift +Alt +S 快捷键用法
*
*/
public class WordCount {
// step 1:Map Class
public static class WordCountMapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text(); @Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
} // step 2: Reduce Class
public static class WordCountReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable(); @Override
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException { int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
} // step 3: Driver, component job
public int run(String[] args) throws Exception {
// 1.get configuration
Configuration configuration = new Configuration();
// 2:create job
Job job = Job.getInstance(configuration, this.getClass()
.getSimpleName());
// run jar
job.setJarByClass(this.getClass());
// 3.set job
// input ->map ->reduce->output
// 3.1 input
Path inPath = new Path(args[0]);
FileInputFormat.addInputPath(job, inPath);
// 3.2 map
job.setMapperClass(WordCountMapper.class);
// 设置map 输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 3.3 reduce
job.setReducerClass(WordCountReducer.class);
// 设置reduce 输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 3.4 output
Path outPath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outPath);
// 4.submit job
boolean isSuccess = job.waitForCompletion(true); return isSuccess ? 0 : 1; } public static void main(String[] args) throws Exception {
int status = new WordCount().run(args);
System.exit(status);
}
}

  

最新文章

  1. JavaScript脚本语言基础(一)
  2. Web前端年后跳槽必看的各种面试题
  3. sql server转移tempdb数据库的物理存放位置
  4. Translation perface: &lt;&lt;Professional JavaScript for Web Developers, 3rd Edition&gt;&gt;
  5. Spring Aop实现方式总结
  6. hdu 4107当卡段树
  7. boxfilter 实现
  8. Linux Yum仓库介绍及服务端及客户端配置
  9. java集合框架的讲解
  10. Codeforces Round #301 (Div. 2)(A,【模拟】B,【贪心构造】C,【DFS】)
  11. Ecto中的changeset,schema,struct,map
  12. 搭建项目(Vue学习笔记一)
  13. Codeforces Round #532 (Div. 2) F 线性基(新坑) + 贪心 + 离线处理
  14. 关于激活Windows10专业版2018长期服务版
  15. SQL SERVER 一组数据按规律横着放置,少则补空,如人员按一进一出的规律,进出为一组,缺少的补null
  16. PHP 中如何创建和修改数组?
  17. [咸恩静][Real Love]
  18. dp入门:最长不下降序列
  19. 无法读取用户配置文件,系统自动建立Temp临时用户
  20. svn更新代码时控制台出现的英文字母表示什么意思

热门文章

  1. LeetCode随缘刷题之回文数
  2. 06 jQuery
  3. mysqlCRUD
  4. 【Azure 应用服务】部署Jar到App Service for Linux,因启动命令路径配置错误而引起:( Application Error 问题
  5. 《深度探索C++对象模型》第一章 | 关于对象
  6. 深入分析CVE-2021-4034及漏洞复现
  7. ACM对抗赛有感
  8. const 和指针之间的姻缘
  9. 新建一个scrapy项目
  10. Docker从入门到精通