运行

mport org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext} /**
* Created by Lee_Rz on 2017/8/30.
*/
object SparkDemo {
def main(args: Array[String]) {
Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
val sc: SparkContext = new SparkContext(new SparkConf().setAppName(this.getClass().getName()).setMaster("local[2]"))
val rdd1: RDD[String] = sc.textFile("C:\\Users\\166\\Desktop\\text.txt") //一行一行的读数据 //懒算子
val key: RDD[(String, Int)] = rdd1.flatMap(_.split(" ")).map((_,)).reduceByKey(_+_)
println(key.collect().toBuffer)//收集到Driver
}
}

报错

Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
// :: WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
// :: INFO Slf4jLogger: Slf4jLogger started
// :: INFO Remoting: Starting remoting
// :: INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriverActorSystem@192.168.0.166:51388]
// :: ERROR Shell: Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:)
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:)
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:)
at org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:)
at org.apache.spark.SparkContext$$anonfun$hadoopFile$$$anonfun$.apply(SparkContext.scala:)
at org.apache.spark.SparkContext$$anonfun$hadoopFile$$$anonfun$.apply(SparkContext.scala:)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$.apply(HadoopRDD.scala:)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$.apply(HadoopRDD.scala:)
at scala.Option.map(Option.scala:)
at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:)
at org.apache.spark.rdd.RDD$$anonfun$partitions$.apply(RDD.scala:)
at org.apache.spark.rdd.RDD$$anonfun$partitions$.apply(RDD.scala:)
at scala.Option.getOrElse(Option.scala:)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:)
at org.apache.spark.rdd.RDD$$anonfun$partitions$.apply(RDD.scala:)
at org.apache.spark.rdd.RDD$$anonfun$partitions$.apply(RDD.scala:)
at scala.Option.getOrElse(Option.scala:)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:)
at org.apache.spark.rdd.RDD$$anonfun$partitions$.apply(RDD.scala:)
at org.apache.spark.rdd.RDD$$anonfun$partitions$.apply(RDD.scala:)
at scala.Option.getOrElse(Option.scala:)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:)
at org.apache.spark.rdd.RDD$$anonfun$partitions$.apply(RDD.scala:)
at org.apache.spark.rdd.RDD$$anonfun$partitions$.apply(RDD.scala:)
at scala.Option.getOrElse(Option.scala:)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:)
at org.apache.spark.Partitioner$.defaultPartitioner(Partitioner.scala:)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$reduceByKey$.apply(PairRDDFunctions.scala:)
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$reduceByKey$.apply(PairRDDFunctions.scala:)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:)
at org.apache.spark.rdd.PairRDDFunctions.reduceByKey(PairRDDFunctions.scala:)
at zx.SparkDemo$.main(SparkDemo.scala:)
at zx.SparkDemo.main(SparkDemo.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:)
at java.lang.reflect.Method.invoke(Method.java:)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:)
// :: INFO FileInputFormat: Total input paths to process :
// :: INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
// :: INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
// :: INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
// :: INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
// :: INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
ArrayBuffer((are,), (hello,), (any,), (ok,), (world,), (me,), (alone,), (you,), (no,), (believie,), (more,))
// :: INFO RemoteActorRefProvider$RemotingTerminator: Shutting down remote daemon. Process finished with exit code

检查发现hadoop下bin目录下已经存在winutils.exe,检查hadoop的path路径,发现没有严格按照格式创建hadoop的path,真确的格式是HADOOP_HOME=......,因为在hadoop的生态圈中很多框架都是依赖hadoop的,所以他们的配置文件中,默认的export的hadoop路径是格式是HADOOP_HOME

最新文章

  1. position导致Safari工具栏不自动隐藏
  2. openstack-swift云存储部署(二)
  3. jprofiler_监控远程linux服务器的tomcat进程(实践)
  4. Reverse Integer
  5. java时间段分成小段存储
  6. CodeForces Round 193 Div2
  7. 时间管理-SMART原则
  8. VC2010 MFC文档类菜单快捷键无法加载问题
  9. 系统学习sqlserver2012 一
  10. Kali+Win7双系统
  11. Android四大基本组件
  12. margin,border,padding简介
  13. 【转】VS 代码行数统计
  14. 关于什么时候用pop什么时候用dismiss
  15. c3p0连接数据库
  16. 请简述javascript的数据类型种类?
  17. hdu5384(trie树)
  18. c++ string 转double
  19. 浅谈Unity3D 骨骼动画
  20. 每天一个linux命令:chmod

热门文章

  1. PhoneNumber
  2. props default 数组(Array)/对象(Object)的默认值应当由一个工厂函数返回
  3. vue Object.freeze() 优化
  4. 【LeetCode】84. Largest Rectangle in Histogram——直方图最大面积
  5. nginx+keepalived实现双机热备高可用性
  6. jenkins构建一个go项目
  7. React学习之事件绑定
  8. Spring学习五----------Bean的配置之Bean的生命周期
  9. 扩展MongoDB C# Driver的QueryBuilder
  10. js高度line-height及宽度text-align:center居中插件