java实现spark常用算子之Repartitions
2024-08-30 06:22:51
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List; /**
* repartitions 算子:
* 增加分区,使用shuffle操作
*/
public class RepartitionsOperator { public static void main(String[] args){
SparkConf conf = new SparkConf().setMaster("local").setAppName("repartitions");
JavaSparkContext sc = new JavaSparkContext(conf);
List<String> names = Arrays.asList("w1","w2","w3","w4","w5","w6"); JavaRDD<String> nameRdd = sc.parallelize(names,2); JavaRDD<String> namefristRdd = nameRdd.mapPartitionsWithIndex(new Function2<Integer, Iterator<String>, Iterator<String>>() {
@Override
public Iterator<String> call(Integer index, Iterator<String> iterator) throws Exception { List<String> list = new ArrayList<>();
while (iterator.hasNext()){
list.add("1["+index+"]"+iterator.next());
}
return list.iterator();
}
},true); //增加分区
JavaRDD<String> temp = namefristRdd.repartition(5); JavaRDD<String> nameseconedRdd = temp.mapPartitionsWithIndex(new Function2<Integer, Iterator<String>, Iterator<String>>() {
@Override
public Iterator<String> call(Integer index, Iterator<String> iterator) throws Exception {
List<String> list = new ArrayList<>();
while (iterator.hasNext()){
list.add("2["+index+"]:"+iterator.next());
}
return list.iterator();
}
},false); nameseconedRdd.foreach(new VoidFunction<String>() {
@Override
public void call(String s) throws Exception {
System.err.println(s);
}
}); }
} 微信扫描下图二维码加入博主知识星球,获取更多大数据、人工智能、算法等免费学习资料哦!
最新文章
- user initialization list vs constructor assignment
- XmlSerializer(Type type, Type[] extraTypes) 内存泄漏
- 《ASP.NET MVC4 WEB编程》学习笔记------UrlHelper
- hdu 1561 The more, The Better (树上背包)
- java 24点算法实现
- ubuntu apache fastcgi 虚拟主机安装
- bootstrap switch功能
- Spark技术内幕:Shuffle Map Task运算结果的处理
- 七.HTTP协议原理介绍
- ios 适配iOS11&;iPhoneX的一些坑
- Mac 下配置Lua环境
- MySQL的正则表达式的LIKE和REGEXP区别
- 安装部署Jenkins服务
- WPF中一个控件绑定另一个控件的属性
- Hibernate的多对多实例
- ZOJ 3202 Second-price Auction
- OutputStreamWriter API 以及源码解读
- Effective C++ Item 10,11 Have assignment operators return a reference to *this Handle assignment to self in operator =
- WOW.js 的使用方法
- 团队冲刺Alpha(八)