Spark- Action实战

package cn.rzlee.spark.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext} object ActionOperation {
def main(args: Array[String]): Unit = {
//reduce()
//collect()
//count()
//take()
//saveAsTextFile()
countByKey()
} def reduce(): Unit ={
val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
val sc = new SparkContext(conf) val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
val sum: Int = numbersRdd.reduce(_+_)
println(sum)
} def collect(): Unit ={
val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
val sc = new SparkContext(conf) val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1) val doubleNumbers: RDD[Int] = numbersRdd.map(num=>num*2)
for(num <- doubleNumbers){
println(num)
}
} def count(): Unit ={
val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
val sc = new SparkContext(conf) val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
val count: Long = numbersRdd.count()
println(count)
} def take(): Unit ={
val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
val sc = new SparkContext(conf) val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1) val top3Numners = numbersRdd.take(3)
for (num <- top3Numners){
println(num)
}
} def saveAsTextFile(): Unit ={
val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
val sc = new SparkContext(conf) val numbersList = Array(1,2,3,4,5,6,7,8,9,10)
val numbersRdd: RDD[Int] = sc.parallelize(numbersList,1)
numbersRdd.saveAsTextFile("C:\\Users\\txdyl\\Desktop\\log\\out\\saveAsTest\\")
} def countByKey(): Unit ={
val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[1]")
val sc = new SparkContext(conf) val studentList = Array(Tuple2("class1","tom"),Tuple2("class2","leo"), Tuple2("class1","jeo"),Tuple2("class2","jime"))
val students: RDD[(String, String)] = sc.parallelize(studentList, 1)
val studentsCounts: collection.Map[String, Long] = students.countByKey()
println(studentsCounts)
} // foreach是在远程机器上执行的,而不是将数据拉取到本地一条条执行,所以性能要比collect要高很多。 }

最新文章

  1. 我的博客CSS
  2. angularjs 中的setTimeout(),setInterval() / $interval 和 $timeout
  3. js动画之同时运动
  4. hdu3294 girl‘s research
  5. Window_搭建SVN服务器
  6. Lintcode: Search Range in Binary Search Tree
  7. 表达式语言之java对正则表达式的处理
  8. U3D中的协同等待函数
  9. C# 实现3Des加密 解密
  10. Magento布局layout.xml文件详解
  11. Aliyun EMR 集群重启
  12. Main Memory Object-Relational Database Management System
  13. msf向存在漏洞的apk注入payload
  14. Linux-day1-pdf课件
  15. 微信小程序转发微信小程序转发
  16. UVA10054-The Necklace(无向图欧拉回路——套圈算法)
  17. matlab 字符串处理函数
  18. 抓取awr、语句级awr、ashrpt
  19. Properties类与配置文件
  20. 利用python实现冒泡排序

热门文章

  1. qtav----ffmeg在ubuntu和win10上的编译和运行
  2. k8s部署dashborad
  3. 【Mac系统 + Python + Django】之搭建第一个【Django Demo(一)】
  4. 常用PhpStorm 快捷键
  5. Memcached 常用的方法
  6. UFLDL深度学习笔记 (五)自编码线性解码器
  7. python学习 01 变量
  8. Linux tar包安装Nginx
  9. linux php.ini又一次载入问题
  10. Linux下,部署多个Tomcat