java

 /**
* 根据单词次数排序的wordcount
* @author Tele
*
*/
public class SortWordCount {
private static SparkConf conf = new SparkConf().setMaster("local").setAppName("sortwordcount");
private static JavaSparkContext jsc = new JavaSparkContext(conf);
private static String path = "D:\\inputword\\result.txt"; public static <U> void main(String[] args) {
JavaRDD<String> rdd = jsc.textFile(path); /*
* JavaRDD<String> lines = rdd.flatMap(new FlatMapFunction<String,String>() {
*
* private static final long serialVersionUID = 1L;
*
* @Override public Iterator<String> call(String t) throws Exception { return
* Arrays.asList(t.split(" ")).iterator(); } });
*
* JavaPairRDD<String, Integer> tuples = lines.mapToPair(new
* PairFunction<String,String,Integer>() {
*
* private static final long serialVersionUID = 1L;
*
* @Override public Tuple2<String,Integer> call(String t) throws Exception {
* return new Tuple2<String,Integer>(t,1); } });
*/ JavaPairRDD<String, Integer> tuples = rdd.flatMapToPair(new PairFlatMapFunction<String, String, Integer>() { private static final long serialVersionUID = 1L; @Override
public Iterator<Tuple2<String, Integer>> call(String t) throws Exception {
Stream<Tuple2<String, Integer>> stream = Arrays.asList(t.split(" ")).stream()
.map(i -> new Tuple2<>(i, 1));
return stream.iterator();
}
}); JavaPairRDD<String, Integer> wc = tuples.reduceByKey(new Function2<Integer, Integer, Integer>() { private static final long serialVersionUID = 1L; @Override
public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
}
}); // 将词频与单词互换位置
JavaPairRDD<Integer, String> cw = wc.mapToPair(new PairFunction<Tuple2<String, Integer>, Integer, String>() { private static final long serialVersionUID = 1L; @Override
public Tuple2<Integer, String> call(Tuple2<String, Integer> t) throws Exception {
return new Tuple2<Integer, String>(t._2, t._1);
}
}); JavaPairRDD<Integer, String> result = cw.sortByKey(false);
result.foreach(new VoidFunction<Tuple2<Integer, String>>() { private static final long serialVersionUID = 1L; @Override
public void call(Tuple2<Integer, String> t) throws Exception {
System.out.println(t._2 + "----" + t._1);
}
}); // 也可以在排序完毕后换成单词-词频的形式
/*
* JavaPairRDD<String, Integer> result = cw.sortByKey(false).mapToPair(new
* PairFunction<Tuple2<Integer,String>,String,Integer>() {
*
* private static final long serialVersionUID = 1L;
*
* @Override public Tuple2<String,Integer> call(Tuple2<Integer, String> t)
* throws Exception { return new Tuple2<String,Integer>(t._2,t._1); } });
*
* result.foreach(new VoidFunction<Tuple2<String,Integer>>() {
*
* private static final long serialVersionUID = 1L;
*
* @Override public void call(Tuple2<String, Integer> t) throws Exception {
* System.out.println(t._1 + "-------" + t._2); } });
*/ jsc.close();
}
}

scala

 object SortWordCount {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("sortwordcount");
val sc = new SparkContext(conf); val rdd = sc.textFile("D:\\inputword\\result.txt", 1); val wordcount = rdd.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _);
wordcount.map(t => (t._2, t._1)).sortByKey(false, 1).map(t => (t._2, t._1)).foreach(t => println(t._1 + "-----" + t._2)); }
}

最新文章

  1. android测试点汇总
  2. Java基础之线程——派生自Thread类的子类(TryThread)
  3. 【转】Android 防破解技术简介
  4. Guava文档翻译之 Service
  5. *gravity的取值详表
  6. 用GOACCESS分析NGINX日志
  7. svg 文字
  8. SQL server语句练习
  9. MTK Android Driver:GPIO
  10. SharePoint 2013 添加Ribbon菜单
  11. YYHS-挑战nbc
  12. lintcode.245 子树
  13. IAR中如何定向把数组和函数放在指定的地址单元
  14. tomcat目录映射
  15. React-Native-Storage使用介绍
  16. SHELL异常处理
  17. 分类模型评估之ROC-AUC曲线和PRC曲线
  18. 解决word2016鼠标每点击一下就出现一个保存的圆圈
  19. POSIX多线程—线程基本概念
  20. 比较运算符in/instanceof/typeof 逻辑表达式||/&amp;&amp;

热门文章

  1. groupbox里面添加Form
  2. 看&lt;Asp.net夜话&gt;随笔(2013-10-13)
  3. WebService学习总结(2)——WebService是什么?
  4. 添加asp.net mvc到现有的asp.net web form 应用程序
  5. Python中的Sets数据结构
  6. [D3] Basic Interactivity with D3 v4
  7. LA 3644 - X-Plosives ( 也即UVA 1160)
  8. 《Springboot极简教程》问题解决:Springboot启动报错 Whitelabel Error Page: This application has no explicit mapping for(转)
  9. SQL基础总结——20150730
  10. LeetCode解题报告--2Sum, 3Sum, 4Sum, K Sum求和问题总结