Scala开发Hadoop示例
2024-10-15 02:33:53
import org.apache.hadoop.conf.{Configuration, Configured};
import org.apache.hadoop.util.{ToolRunner, Tool};
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.io.{LongWritable, Text, IntWritable};
import org.apache.hadoop.mapreduce.{Reducer, Mapper, Job};
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; /**
* Created with IntelliJ IDEA.
* User: riley
* Date: 8/26/13
* Time: 1:58 PM
*/
object WordCount extends Configured with Tool
{
class Map extends Mapper[LongWritable, Text, Text, IntWritable]
{
private val one: IntWritable = new IntWritable(1);
private var word: Text; override def map(key: LongWritable, rowLine: Text, context: Mapper[LongWritable, Text, Text, IntWritable]#Context)
{
val line = rowLine.toString();
if (line.isEmpty) return; val tokens: Array[String] = line.split(" ");
for (item: String <- tokens) {
word.set(item);
context.write(word, one);
}
}
} class Reduce extends Reducer[Text, IntWritable, Text, IntWritable]
{
private var count: IntWritable = new IntWritable(); override def reduce(key: Text, values: Iterable[IntWritable], context: Reducer[Text, IntWritable, Text, IntWritable]#Context)
{
var sum: Int = 0; for (i: IntWritable <- values) sum = sum + i.get(); count.set(sum);
context.write(key, count);
}
} def run(args: Array[String]) =
{
val conf = super.getConf();
val job = new Job(conf, "WordCount"); job.setJarByClass(this.getClass);
job.setOutputKeyClass(classOf[Text]);
job.setOutputValueClass(classOf[IntWritable]); job.setMapperClass(classOf[Map]);
job.setReducerClass(classOf[Reduce]);
job.setCombinerClass(classOf[Reduce]); FileInputFormat.addInputPath(job, new Path(args(0)));
FileOutputFormat.setOutputPath(job, new Path(args(1))); val status = job.waitForCompletion(true);
if (status) 0 else 1;
} def main(args: Array[String])
{
val conf: Configuration = new Configuration();
System.exit(ToolRunner.run(conf, this, args));
}
}
最新文章
- ddd 聚合根 之 聚合与不聚合 设计
- 【ASP.NET 问题】IIS发布网站后出现 ";处理程序“PageHandlerFactory-Integrated”在其模块列表中有一个错误";的解决办法
- 解决Oracle在scott用户下创建视图(VIEW)权限不足的方法
- NULL指针、零指针、野指针
- html的转码玉反转码
- C#世界中的委托
- 【Android接口实现】ActionBar利用整理的一些细节
- 矢量量化(VQ)
- RESTful 的总结
- [Swift]LeetCode144. 二叉树的前序遍历 | Binary Tree Preorder Traversal
- imageio.ffmpeg.download() has been deprecated. Use &#39;pip install im ageio-ffmpeg&#39; instead.&#39;
- canal入门Demo
- 学 Win32 汇编[33] - 探讨 Win32 汇编的模块化编程
- 配置tomcat通过客户端访问
- 集合框架—常见的Set集合
- jquery源码解析:jQuery数据缓存机制详解2
- manjaro 添加tash 快捷方式
- angular路由传参和获取路由参数的方法
- 基于scrapy的分布式爬虫抓取新浪微博个人信息和微博内容存入MySQL
- Django的restful api自动生成工具django-rest-swagger介绍
热门文章
- js中怎么去掉数组的空值
- Navicat工具导出mySQL数据库某个视图结构的.sql脚本
- jquery 给新增的addClass 使用css样式
- h5移动端flexible源码适配终端解读以及常用sass函数
- $x \rightarrow \infty$时多项式型函数的极限
- JavaFX 之自定义窗口标题栏(二)
- 骰子点数概率__dp
- Vmvare + Ubuntu 16.04环境搭建 + 相关软件安装配置笔记【深度学习】
- 了解IHttpModule接口事件执行顺便 获取Session
- mysql字符集和校对规则(Mysql校对集)