簡體   English   中英

spark - 讀取 json 文件時出現問題

[英]spark - Issue in reading json file

我正在嘗試使用以下代碼讀取 json 文件,但它返回多個錯誤:

val df = sqlcontext.read.json("E:/Dataset/Apps_for_Android_5.json")

請幫助解決錯誤提前謝謝

錯誤

scala> val df = sqlcontext.read.json("E:/Dataset/Apps_for_Android_5.json")
[Stage 2:>                                                         (0 + 4) / 10]
17/01/22 08:15:09 ERROR Executor: Exception in task 2.0 in stage 2.0 (TID 14)
java.util.NoSuchElementException: None.get
        at scala.None$.get(Option.scala:347)
        at scala.None$.get(Option.scala:345)
        at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
        at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 WARN TaskSetManager: Lost task 2.0 in stage 2.0 (TID 14, local
host): java.util.NoSuchElementException: None.get
        at scala.None$.get(Option.scala:347)
        at scala.None$.get(Option.scala:345)
        at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
        at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)

17/01/22 08:15:09 ERROR TaskSetManager: Task 2 in stage 2.0 failed 1 times; abor
ting job
17/01/22 08:15:09 ERROR Executor: Exception in task 1.0 in stage 2.0 (TID 13)
java.util.NoSuchElementException: None.get
        at scala.None$.get(Option.scala:347)
        at scala.None$.get(Option.scala:345)
        at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
        at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 ERROR Executor: Exception in task 4.0 in stage 2.0 (TID 16)
org.apache.spark.TaskKilledException
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:264)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 12)
java.util.NoSuchElementException: None.get
        at scala.None$.get(Option.scala:347)
        at scala.None$.get(Option.scala:345)
        at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
        at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 ERROR Executor: Exception in task 3.0 in stage 2.0 (TID 15)
java.util.NoSuchElementException: None.get
        at scala.None$.get(Option.scala:347)
        at scala.None$.get(Option.scala:345)
        at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
        at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)
17/01/22 08:15:09 WARN TaskSetManager: Lost task 4.0 in stage 2.0 (TID 16, local
host): org.apache.spark.TaskKilledException
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:264)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)

org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in sta
ge 2.0 failed 1 times, most recent failure: Lost task 2.0 in stage 2.0 (TID 14,
localhost): java.util.NoSuchElementException: None.get
        at scala.None$.get(Option.scala:347)
        at scala.None$.get(Option.scala:345)
        at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(Bloc
kInfoManager.scala:343)
        at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockMan
ager.scala:646)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)

        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)

Driver stacktrace:
  at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGSched
uler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
  at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGSche
duler.scala:1442)
  at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGSche
duler.scala:1441)
  at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:
59)
  at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
  at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)

  at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.appl
y(DAGScheduler.scala:811)
  at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.appl
y(DAGScheduler.scala:811)
  at scala.Option.foreach(Option.scala:257)
  at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.sc
ala:811)
  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGSche
duler.scala:1667)
  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGSchedu
ler.scala:1622)
  at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGSchedu
ler.scala:1611)
  at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
  at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
  at org.apache.spark.SparkContext.runJob(SparkContext.scala:1873)
  at org.apache.spark.SparkContext.runJob(SparkContext.scala:1936)
  at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1065)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:1
51)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:1
12)
  at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
  at org.apache.spark.rdd.RDD.fold(RDD.scala:1059)
  at org.apache.spark.sql.execution.datasources.json.InferSchema$.infer(InferSch
ema.scala:68)
  at org.apache.spark.sql.execution.datasources.json.JsonFileFormat.inferSchema(
JsonFileFormat.scala:62)
  at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(Dat
aSource.scala:421)
  at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$15.apply(Dat
aSource.scala:421)
  at scala.Option.orElse(Option.scala:289)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataS
ource.scala:420)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:149)
  at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:294)
  at org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:249)
  ... 52 elided
Caused by: java.util.NoSuchElementException: None.get
  at scala.None$.get(Option.scala:347)
  at scala.None$.get(Option.scala:345)
  at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(BlockInfoM
anager.scala:343)
  at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockManager.s
cala:646)
  at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
  at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
  at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
  at java.lang.Thread.run(Unknown Source)

看起來這是一個報告的 Spark 問題 - 到目前為止還沒有明確的隔離或解決方案: https : //issues.apache.org/jira/browse/SPARK-16599

唯一建議的解決方法是降級到 Spark 1.6.2。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM