报错信息:
org.apache.hadoop.io.nativeio.NativeIO$POSIX.stat(Ljava/lang/String;)Lorg/apache/hadoop/io/nativeio/NativeIO$POSIX$Stat;
解释:本地IO(nativeIO)无法读取文件
代码:
object Spark01_WordCount {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local").setAppName("wordCount")
val sc = new SparkContext(conf)
val lines: RDD[String] = sc.textFile("datas/*")
//将一行数据进行拆分,形成一个一个的单词 扁平化操作
val words: RDD[String] = lines.flatMap(_.split(" "))
//将数据根据单词进行分组,便于统计
val wordGroup: RDD[(String, Iterable[String])] = words.groupBy(word=>word)
//对分组后的数据进行转换
val wordToCount: RDD[(String, Int)] = wordGroup.map {
case (word, list) => {
(word, list.size)
}
}
val array: Array[(String, Int)] = wordToCount.collect()
array.foreach(println)
sc.stop()
}
}
解决方案:
在 sc.textFile,提供准确的文件路径
val lines: RDD[String] = sc.textFile("datas/*")