import org.apache.hadoop.conf.{Configuration, Configured};
import org.apache.hadoop.util.{ToolRunner, Tool};
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.io.{LongWritable, Text, IntWritable};
import org.apache.hadoop.mapreduce.{Reducer, Mapper, Job};
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; /**
* Created with IntelliJ IDEA.
* User: riley
* Date: 8/26/13
* Time: 1:58 PM
*/
object WordCount extends Configured with Tool
{
class Map extends Mapper[LongWritable, Text, Text, IntWritable]
{
private val one: IntWritable = new IntWritable(1);
private var word: Text; override def map(key: LongWritable, rowLine: Text, context: Mapper[LongWritable, Text, Text, IntWritable]#Context)
{
val line = rowLine.toString();
if (line.isEmpty) return; val tokens: Array[String] = line.split(" ");
for (item: String <- tokens) {
word.set(item);
context.write(word, one);
}
}
} class Reduce extends Reducer[Text, IntWritable, Text, IntWritable]
{
private var count: IntWritable = new IntWritable(); override def reduce(key: Text, values: Iterable[IntWritable], context: Reducer[Text, IntWritable, Text, IntWritable]#Context)
{
var sum: Int = 0; for (i: IntWritable <- values) sum = sum + i.get(); count.set(sum);
context.write(key, count);
}
} def run(args: Array[String]) =
{
val conf = super.getConf();
val job = new Job(conf, "WordCount"); job.setJarByClass(this.getClass);
job.setOutputKeyClass(classOf[Text]);
job.setOutputValueClass(classOf[IntWritable]); job.setMapperClass(classOf[Map]);
job.setReducerClass(classOf[Reduce]);
job.setCombinerClass(classOf[Reduce]); FileInputFormat.addInputPath(job, new Path(args(0)));
FileOutputFormat.setOutputPath(job, new Path(args(1))); val status = job.waitForCompletion(true);
if (status) 0 else 1;
} def main(args: Array[String])
{
val conf: Configuration = new Configuration();
System.exit(ToolRunner.run(conf, this, args));
}
}

最新文章

  1. ddd 聚合根 之 聚合与不聚合 设计
  2. 【ASP.NET 问题】IIS发布网站后出现 &quot;处理程序“PageHandlerFactory-Integrated”在其模块列表中有一个错误&quot;的解决办法
  3. 解决Oracle在scott用户下创建视图(VIEW)权限不足的方法
  4. NULL指针、零指针、野指针
  5. html的转码玉反转码
  6. C#世界中的委托
  7. 【Android接口实现】ActionBar利用整理的一些细节
  8. 矢量量化(VQ)
  9. RESTful 的总结
  10. [Swift]LeetCode144. 二叉树的前序遍历 | Binary Tree Preorder Traversal
  11. imageio.ffmpeg.download() has been deprecated. Use &#39;pip install im ageio-ffmpeg&#39; instead.&#39;
  12. canal入门Demo
  13. 学 Win32 汇编[33] - 探讨 Win32 汇编的模块化编程
  14. 配置tomcat通过客户端访问
  15. 集合框架—常见的Set集合
  16. jquery源码解析:jQuery数据缓存机制详解2
  17. manjaro 添加tash 快捷方式
  18. angular路由传参和获取路由参数的方法
  19. 基于scrapy的分布式爬虫抓取新浪微博个人信息和微博内容存入MySQL
  20. Django的restful api自动生成工具django-rest-swagger介绍

热门文章

  1. js中怎么去掉数组的空值
  2. Navicat工具导出mySQL数据库某个视图结构的.sql脚本
  3. jquery 给新增的addClass 使用css样式
  4. h5移动端flexible源码适配终端解读以及常用sass函数
  5. $x \rightarrow \infty$时多项式型函数的极限
  6. JavaFX 之自定义窗口标题栏(二)
  7. 骰子点数概率__dp
  8. Vmvare + Ubuntu 16.04环境搭建 + 相关软件安装配置笔记【深度学习】
  9. 了解IHttpModule接口事件执行顺便 获取Session
  10. mysql字符集和校对规则(Mysql校对集)