spark 基于key排序的wordcount
2024-08-31 22:01:16
java
/**
* 根据单词次数排序的wordcount
* @author Tele
*
*/
public class SortWordCount {
private static SparkConf conf = new SparkConf().setMaster("local").setAppName("sortwordcount");
private static JavaSparkContext jsc = new JavaSparkContext(conf);
private static String path = "D:\\inputword\\result.txt"; public static <U> void main(String[] args) {
JavaRDD<String> rdd = jsc.textFile(path); /*
* JavaRDD<String> lines = rdd.flatMap(new FlatMapFunction<String,String>() {
*
* private static final long serialVersionUID = 1L;
*
* @Override public Iterator<String> call(String t) throws Exception { return
* Arrays.asList(t.split(" ")).iterator(); } });
*
* JavaPairRDD<String, Integer> tuples = lines.mapToPair(new
* PairFunction<String,String,Integer>() {
*
* private static final long serialVersionUID = 1L;
*
* @Override public Tuple2<String,Integer> call(String t) throws Exception {
* return new Tuple2<String,Integer>(t,1); } });
*/ JavaPairRDD<String, Integer> tuples = rdd.flatMapToPair(new PairFlatMapFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override
public Iterator<Tuple2<String, Integer>> call(String t) throws Exception {
Stream<Tuple2<String, Integer>> stream = Arrays.asList(t.split(" ")).stream()
.map(i -> new Tuple2<>(i, 1));
return stream.iterator();
}
}); JavaPairRDD<String, Integer> wc = tuples.reduceByKey(new Function2<Integer, Integer, Integer>() { private static final long serialVersionUID = 1L; @Override
public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
}
}); // 将词频与单词互换位置
JavaPairRDD<Integer, String> cw = wc.mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() { private static final long serialVersionUID = 1L; @Override
public Tuple2<Integer, String> call(Tuple2<String, Integer> t) throws Exception {
return new Tuple2<Integer, String>(t._2, t._1);
}
}); JavaPairRDD<Integer, String> result = cw.sortByKey(false);
result.foreach(new VoidFunction<Tuple2<Integer, String>>() { private static final long serialVersionUID = 1L; @Override
public void call(Tuple2<Integer, String> t) throws Exception {
System.out.println(t._2 + "----" + t._1);
}
}); // 也可以在排序完毕后换成单词-词频的形式
/*
* JavaPairRDD<String, Integer> result = cw.sortByKey(false).mapToPair(new
* PairFunction<Tuple2<Integer,String>,String,Integer>() {
*
* private static final long serialVersionUID = 1L;
*
* @Override public Tuple2<String,Integer> call(Tuple2<Integer, String> t)
* throws Exception { return new Tuple2<String,Integer>(t._2,t._1); } });
*
* result.foreach(new VoidFunction<Tuple2<String,Integer>>() {
*
* private static final long serialVersionUID = 1L;
*
* @Override public void call(Tuple2<String, Integer> t) throws Exception {
* System.out.println(t._1 + "-------" + t._2); } });
*/ jsc.close();
}
}
scala
object SortWordCount {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("sortwordcount");
val sc = new SparkContext(conf); val rdd = sc.textFile("D:\\inputword\\result.txt", 1); val wordcount = rdd.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _);
wordcount.map(t => (t._2, t._1)).sortByKey(false, 1).map(t => (t._2, t._1)).foreach(t => println(t._1 + "-----" + t._2)); }
}
最新文章
- android测试点汇总
- Java基础之线程——派生自Thread类的子类(TryThread)
- 【转】Android 防破解技术简介
- Guava文档翻译之 Service
- *gravity的取值详表
- 用GOACCESS分析NGINX日志
- svg 文字
- SQL server语句练习
- MTK Android Driver:GPIO
- SharePoint 2013 添加Ribbon菜单
- YYHS-挑战nbc
- lintcode.245 子树
- IAR中如何定向把数组和函数放在指定的地址单元
- tomcat目录映射
- React-Native-Storage使用介绍
- SHELL异常处理
- 分类模型评估之ROC-AUC曲线和PRC曲线
- 解决word2016鼠标每点击一下就出现一个保存的圆圈
- POSIX多线程—线程基本概念
- 比较运算符in/instanceof/typeof 逻辑表达式||/&;&;
热门文章
- groupbox里面添加Form
- 看<;Asp.net夜话>;随笔(2013-10-13)
- WebService学习总结(2)——WebService是什么?
- 添加asp.net mvc到现有的asp.net web form 应用程序
- Python中的Sets数据结构
- [D3] Basic Interactivity with D3 v4
- LA 3644 - X-Plosives ( 也即UVA 1160)
- 《Springboot极简教程》问题解决:Springboot启动报错 Whitelabel Error Page: This application has no explicit mapping for(转)
- SQL基础总结——20150730
- LeetCode解题报告--2Sum, 3Sum, 4Sum, K Sum求和问题总结