[英]ClassNotFoundException spark-submit scala
嗨,我正在尝试生成Salt示例的输出,但未按文档中所述使用docker。 我找到了有助于生成Main.scala输出的scala代码。 我将Main.scala修改为方便的一个
package BinExTest
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Row
import software.uncharted.salt.core.projection.numeric._
import software.uncharted.salt.core.generation.request._
import software.uncharted.salt.core.generation.Series
import software.uncharted.salt.core.generation.TileGenerator
import software.uncharted.salt.core.generation.output.SeriesData
import software.uncharted.salt.core.analytic.numeric._
import java.io._
import scala.util.parsing.json.JSONObject
object Main {
// Defines the tile size in both x and y bin dimensions
val tileSize = 256
// Defines the output layer name
val layerName = "pickups"
// Creates and returns an Array of Double values encoded as 64bit Integers
def createByteBuffer(tile: SeriesData[(Int, Int, Int), (Int, Int), Double, (Double, Double)]): Array[Byte] = {
val byteArray = new Array[Byte](tileSize * tileSize * 8)
var j = 0
tile.bins.foreach(b => {
val data = java.lang.Double.doubleToLongBits(b)
for (i <- 0 to 7) {
byteArray(j) = ((data >> (i * 8)) & 0xff).asInstanceOf[Byte]
j += 1
}
})
byteArray
}
def main(args: Array[String]): Unit = {
val jarFile = "/home/kesava/Studies/BinExTest/BinExTest.jar";
val inputPath = "/home/kesava/Downloads/taxi_micro.csv"
val outputPath = "/home/kesava/SoftWares/salt/salt-examples/bin-example/Output"
val conf = new SparkConf().setAppName("salt-bin-example").setJars(Array(jarFile))
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
sqlContext.read.format("com.databricks.spark.csv")
.option("header", "true")
.option("inferSchema", "true")
.load(s"file://$inputPath")
.registerTempTable("taxi_micro")
// Construct an RDD of Rows containing only the fields we need. Cache the result
val input = sqlContext.sql("select pickup_lon, pickup_lat from taxi_micro")
.rdd.cache()
// Given an input row, return pickup longitude, latitude as a tuple
val pickupExtractor = (r: Row) => {
if (r.isNullAt(0) || r.isNullAt(1)) {
None
} else {
Some((r.getDouble(0), r.getDouble(1)))
}
}
// Tile Generator object, which houses the generation logic
val gen = TileGenerator(sc)
// Break levels into batches. Process several higher levels at once because the
// number of tile outputs is quite low. Lower levels done individually due to high tile counts.
val levelBatches = List(List(0, 1, 2, 3, 4, 5, 6, 7, 8), List(9, 10, 11), List(12), List(13), List(14))
// Iterate over sets of levels to generate.
val levelMeta = levelBatches.map(level => {
println("------------------------------")
println(s"Generating level $level")
println("------------------------------")
// Construct the definition of the tiling jobs: pickups
val pickups = new Series((tileSize - 1, tileSize - 1),
pickupExtractor,
new MercatorProjection(level),
(r: Row) => Some(1),
CountAggregator,
Some(MinMaxAggregator))
// Create a request for all tiles on these levels, generate
val request = new TileLevelRequest(level, (coord: (Int, Int, Int)) => coord._1)
val rdd = gen.generate(input, pickups, request)
// Translate RDD of Tiles to RDD of (coordinate,byte array), collect to master for serialization
val output = rdd
.map(s => pickups(s).get)
.map(tile => {
// Return tuples of tile coordinate, byte array
(tile.coords, createByteBuffer(tile))
})
.collect()
// Save byte files to local filesystem
output.foreach(tile => {
val coord = tile._1
val byteArray = tile._2
val limit = (1 << coord._1) - 1
// Use standard TMS path structure and file naming
val file = new File(s"$outputPath/$layerName/${coord._1}/${coord._2}/${limit - coord._3}.bins")
file.getParentFile.mkdirs()
val output = new FileOutputStream(file)
output.write(byteArray)
output.close()
})
// Create map from each level to min / max values.
rdd
.map(s => pickups(s).get)
.map(t => (t.coords._1.toString, t.tileMeta.get))
.reduceByKey((l, r) => {
(Math.min(l._1, r._1), Math.max(l._2, r._2))
})
.mapValues(minMax => {
JSONObject(Map(
"min" -> minMax._1,
"max" -> minMax._2
))
})
.collect()
.toMap
})
// Flatten array of maps into a single map
val levelInfoJSON = JSONObject(levelMeta.reduce(_ ++ _)).toString()
// Save level metadata to filesystem
val pw = new PrintWriter(s"$outputPath/$layerName/meta.json")
pw.write(levelInfoJSON)
pw.close()
}
}
我为此scala创建了一个单独的文件夹,在其中创建了另一个名为lib的文件夹,其中包含所需的jar文件,并使用scalac对其进行了如下编译,
scalac -cp“ lib / salt.jar:lib / spark.jar” Main.scala
这成功运行并在BinExTest文件夹下生成了类。
现在,该项目的build.gradle具有以下代码行,通过这些代码行可以识别出这是有助于生成输出数据集的命令,
task run(overwrite: true, type: Exec, dependsOn: [assemble]) {
executable = 'spark-submit'
args = ["--class","software.uncharted.salt.examples.bin.Main","/opt/salt/build/libs/salt-bin-example-${version}.jar", "/opt/data/taxi_one_day.csv", "/opt/output"]
}
看到这一点,我做了以下命令,
spark-submit-类BinExTest.Main lib / salt.jar
执行此操作时,出现以下错误,
java.lang.ClassNotFoundException:Main.BinExTest at java.net.URLClassLoader $ 1.run(URLClassLoader.java:366)at java.net.URLClassLoader $ 1.run(URLClassLoader.java:355)at java.security.AccessController.doPrivileged( Java的java.net.URLClassLoader.findClass(URLClassLoader.java:354)的本机方法(java.lang.ClassLoader.loadClass(ClassLoader.java:425)的java.lang.ClassLoader.loadClass(ClassLoader.java:358) org.apache.spark.util.Utils $ .classForName(Utils.scala:174)处的java.lang.Class.forName(Class.java:278)处的.lang.Class.forName0(本机方法)。 org上的spark.deploy.SparkSubmit $ .org $ apache $ spark $ deploy $ SparkSubmit $$ runMain(SparkSubmit.scala:689)在org.apache.spark.deploy.SparkSubmit $ .doRunMain $ 1(SparkSubmit.scala:181)在org。 org.apache.spark.deploy.SparkSubmit $ .main(SparkSubmit.scala:121)上的apache.spark.deploy.SparkSubmit $ .submit(SparkSubmit.scala:206)在org.apache.spark.deploy.SparkSubmit.main( SparkSubmit.scala)
有人可以帮我吗? 我对此完全陌生,只是通过探索就走到了这一步。
spark-submit --class BinExTest.Main --jars“ BinExTest.jar”“ lib / salt.jar”
我得到了ClassNotFoundException去产生新的错误,如下所示,
线程“主”中的异常org.apache.spark.SparkException:由于阶段失败而导致作业中止:阶段3.0中的任务1失败1次,最近一次失败:阶段3.0中的任务1.0(TID 6,本地主机)丢失:java.lang NoSuchMethodError:scala.runtime.IntRef.create(I)Lscala / runtime / IntRef; 在BinExTest.Main $ .createByteBuffer(Main.scala:29)在BinExTest.Main $$ anonfun $ 2 $$ anonfun $ 6.apply(Main.scala:101)在BinExTest.Main $$ anonfun $ 2 $$ anonfun $ 6.apply( main.scala:99)位于scala.collection.Iterator $$ anon $ 11.next(Iterator.scala:328)位于scala.collection.Iterator $ class.foreach(Iterator.scala:727)位于scala.collection.AbstractIterator.foreach (Iterator.scala:1157)在scala.collection.generic.Growable $ class。$ plus $ plus $ eq(Growable.scala:48)在scala.collection.mutable.ArrayBuffer。$ plus $ plus $ eq(ArrayBuffer.scala :103)在scala.collection.mutable.ArrayBuffer。$ plus $ plus $ eq(ArrayBuffer.scala:47)在scala.collection.TraversableOnce $ class.to(TraversableOnce.scala:273)在scala.collection.AbstractIterator.to (Iterator.scala:1157)在scala.collection.TraversableOnce $ class.toBuffer(TraversableOnce.scala:265)在scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)在scala.collection.TraversableOnce $ class.toArray( TraversableOnce.scala:252)在scala.collection.AbstractIterator.toArray(Itera) tor.scala:1157)在org.apache.spark.rdd.RDD $$ anonfun $ collect $ 1 $$ anonfun $ 12.apply(RDD.scala:927)在org.apache.spark.rdd.RDD $$ anonfun $ collect org.apache.spark.SparkContext $$ anonfun $ runJob的$ 1 $ ananfun $ 12.apply(RDD.scala:927)在org.apache.spark.SparkContext $$ anonfun $ runJob的$ 5.apply(SparkContext.scala:1858) org.apache.spark.scheduler.Task.run(Task.scala:89)的org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)的$ 5.apply(SparkContext.scala:1858) .apache.spark.executor.Executor $ TaskRunner.run(Executor.scala:214)at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)at java.util.concurrent.ThreadPoolExecutor $ Worker.run(ThreadPoolExecutor .java:615),位于java.lang.Thread.run(Thread.java:745)
知道发生了什么吗?
2010年6月5日18:39:15错误TaskSetManager:阶段2.0中的任务0失败1次; 正在中止作业线程“主”中的异常org.apache.spark.SparkException:由于阶段失败而中止工作:阶段2.0中的任务0失败1次,最近一次失败:阶段2.0中的任务0.0丢失(TID 3,本地主机):java .lang.NoClassDefFoundError:scala / collection / GenTraversableOnce $ class在software.uncharted.salt.core.util.SparseArray。(SparseArray.scala:37)在software.uncharted.salt.core.util.SparseArray。(SparseArray.scala: 57)at software.uncharted.salt.core.generation.rdd.RDDSeriesWrapper.makeBins(RDDTileGenerator.scala:224)at software.uncharted.salt.core.generation.rdd.RDDTileGeneratorCombiner.createCombiner(RDDTileGenerator.scala:128) .uncharted.salt.core.generation.rdd.RDDTileGenerator $$ anonfun $ 3.apply(RDDTileGenerator.scala:100)在软件.uncharted.salt.core.generation.rdd.RDDTileGenerator $$ anonfun $ 3.apply(RDDTileGenerator.scala: 100)at org.apache.spark.util.collection.ExternalSorter $$ anonfun $ 5.apply(ExternalSorter.scala:187)在org.apache.spark.util.collection.Externa org.apache.spark.util.collection.AppendOnlyMap.changeValue(AppendOnlyMap.scala:148)处的lSorter $$ anonfun $ 5.apply(ExternalSorter.scala:186)在org.apache.spark.util.collection.SizeTrackingAppendOnlyMap.changeValue( org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:192)位于org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)处的SizeTrackingAppendOnlyMap.scala:32) org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)上的.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)在org.apache.spark.scheduler.Task.run(Task .scala:89),位于org.apache.spark.executor.Executor $ TaskRunner.run(Executor.scala:214),位于java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145),位于java.util.concurrent。 java.lang.Thread.run(Thread.java:745)上的ThreadPoolExecutor $ Worker.run(ThreadPoolExecutor.java:615)原因:java.lang.ClassNotFoundException:scala.collection.GenTraversableOnce $ class在 java.net.URLClassLoader的java.net.URLClassLoader $ 1.run(URLClassLoader.java:366)在java.net.URLClassLoader $ 1.run(URLClassLoader.java:355)在java.security.AccessController.doPrivileged(本机方法)。在java.lang.ClassLoader.loadClass(ClassLoader.java:425)的findClass(URLClassLoader.java:354)在java.lang.ClassLoader.loadClass(ClassLoader.java:358)的findClass(URLClassLoader.java:354)
这是因为scala2.11没有提到的类吗?
spark-submit --class“ BinExTest.Main” --jars“ BinExTest.jar,lib / scala210.jar”“ lib / salt.jar”
为了运行Spark作业,它需要在组成您的Spark集群的不同节点上自我复制代码。 它是通过将jar文件直接复制到其他节点来实现的。
这意味着您需要确保您的类文件打包在.jar文件中。 在我的典型解决方案中,我将构建一个将类文件和从属jar文件打包到一个.jar文件中的Uber jar。 为此,我使用了Maven Shade插件 。 那不一定是您的解决方案,但是至少您应该从生成的类中构建一个.jar文件。
要手动提供其他jar文件-您将需要使用--jars
选项添加它们,这将需要一个逗号分隔的列表。
实际上,即使对于我来说,所有可用选项(尤其是jar文件及其分配方式)还是在spark中修改类路径也有很多困惑。 查看我刚刚发布的另一个主题 。
对于您的问题的第二部分,该问题已经在另一个线程上得到了回答。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.