[英]Scala Spark Encoders.product[X] (where X is a case class) keeps giving me "No TypeTag available for X" error
I am working with Intellij Idea, in a Scala worksheet.我正在使用 Intellij Idea,在 Scala 工作表中。 I want to create a encoder for a scala case class.我想为 scala 案例 class 创建一个编码器。 From various posts on internet I found the suggestion to use Encoders.product.从互联网上的各种帖子中,我发现了使用 Encoders.product 的建议。 But it never worked for me.但它从来没有为我工作过。
The following code以下代码
import org.apache.spark.sql.*
val spark: SparkSession =
SparkSession
.builder()
.appName("test")
.master("local")
.getOrCreate()
import scala3encoders.given
case class classa(i: Int, j: Int, s: String)
val enc = Encoders.product[classa]
keep throwing error:继续抛出错误:
-- Error: ----------------------------------------------------------------------
1 |val enc = Encoders.product[classa]
| ^
| No TypeTag available for classa
1 error found
Does anyone know what's going on there?有谁知道那里发生了什么?
The content of build.sbt file is: build.sbt文件内容为:
scalaVersion := "3.1.3"
scalacOptions ++= Seq("-language:implicitConversions", "-deprecation")
libraryDependencies ++= Seq(
excludes(("org.apache.spark" %% "spark-core" % "3.2.0").cross(CrossVersion.for3Use2_13)),
excludes(("org.apache.spark" %% "spark-sql" % "3.2.0").cross(CrossVersion.for3Use2_13)),
excludes("io.github.vincenzobaz" %% "spark-scala3" % "0.1.3"),
"org.scalameta" %% "munit" % "0.7.26" % Test
)
//netty-all replaces all these excludes
def excludes(m: ModuleID): ModuleID =
m.exclude("io.netty", "netty-common").
exclude("io.netty", "netty-handler").
exclude("io.netty", "netty-transport").
exclude("io.netty", "netty-buffer").
exclude("io.netty", "netty-codec").
exclude("io.netty", "netty-resolver").
exclude("io.netty", "netty-transport-native-epoll").
exclude("io.netty", "netty-transport-native-unix-common").
exclude("javax.xml.bind", "jaxb-api").
exclude("jakarta.xml.bind", "jaxb-api").
exclude("javax.activation", "activation").
exclude("jakarta.annotation", "jakarta.annotation-api").
exclude("javax.annotation", "javax.annotation-api")
// Without forking, ctrl-c doesn't actually fully stop Spark
run / fork := true
Test / fork := true
Encoders.product[classa]
is a Scala 2 thing. Encoders.product[classa]
是 Scala 2 的东西。 This method accepts an implicit TypeTag
.此方法接受隐式TypeTag
。 There are no TypeTag
s in Scala 3. In Scala 3 the library maintainers propose to work in the following way: Scala 3 中没有TypeTag
。在 Scala 3 中,库维护者建议按以下方式工作:
https://github.com/vincenzobaz/spark-scala3/blob/main/examples/src/main/scala/sql/StarWars.scala https://github.com/vincenzobaz/spark-scala3/blob/main/examples/src/main/scala/sql/StarWars.scala
package sql
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.{Dataset, DataFrame, SparkSession}
import org.apache.spark.sql.functions._
import org.apache.spark.sql._
object StarWars extends App:
val spark = SparkSession.builder().master("local").getOrCreate
import spark.implicits.localSeqToDatasetHolder
import scala3encoders.given
extension [T: Encoder] (seq: Seq[T])
def toDS: Dataset[T] =
localSeqToDatasetHolder(seq).toDS
case class Friends(name: String, friends: String)
val df: Dataset[Friends] = Seq(
("Yoda", "Obi-Wan Kenobi"),
("Anakin Skywalker", "Sheev Palpatine"),
("Luke Skywalker", "Han Solo, Leia Skywalker"),
("Leia Skywalker", "Obi-Wan Kenobi"),
("Sheev Palpatine", "Anakin Skywalker"),
("Han Solo", "Leia Skywalker, Luke Skywalker, Obi-Wan Kenobi, Chewbacca"),
("Obi-Wan Kenobi", "Yoda, Qui-Gon Jinn"),
("R2-D2", "C-3PO"),
("C-3PO", "R2-D2"),
("Darth Maul", "Sheev Palpatine"),
("Chewbacca", "Han Solo"),
("Lando Calrissian", "Han Solo"),
("Jabba", "Boba Fett")
).toDS.map((n,f) => Friends(n, f))
val friends = df.as[Friends]
friends.show()
case class FriendsMissing(who: String, friends: Option[String])
val dsMissing: Dataset[FriendsMissing] = Seq(
("Yoda", Some("Obi-Wan Kenobi")),
("Anakin Skywalker", Some("Sheev Palpatine")),
("Luke Skywalker", Option.empty[String]),
("Leia Skywalker", Some("Obi-Wan Kenobi")),
("Sheev Palpatine", Some("Anakin Skywalker")),
("Han Solo", Some("Leia Skywalker, Luke Skywalker, Obi-Wan Kenobi"))
).toDS
.map((a, b) => FriendsMissing(a, b))
dsMissing.show()
case class Character(
name: String,
height: Int,
weight: Option[Int],
eyecolor: Option[String],
haircolor: Option[String],
jedi: String,
species: String
)
val characters: Dataset[Character] = spark.sqlContext
.read
.option("header", "true")
.option("delimiter", ";")
.option("inferSchema", "true")
.csv("StarWars.csv")
.as[Character]
characters.show()
val sw_df = characters.join(friends, Seq("name"))
sw_df.show()
case class SW(
name: String,
height: Int,
weight: Option[Int],
eyecolor: Option[String],
haircolor: Option[String],
jedi: String,
species: String,
friends: String
)
val sw_ds = sw_df.as[SW]
So if you really need Encoders.product[classa]
compile this part of your code with Scala 2所以如果你真的需要Encoders.product[classa]
用 Scala 2 编译你的这部分代码
src/App.scala src/App.scala
// this is Scala 3
object App {
def main(args: Array[String]): Unit = {
println(App1.schema)
// Seq(StructField(i,IntegerType,false), StructField(j,IntegerType,false), StructField(s,StringType,true))
}
}
scala2/src/main/scala/App1.scala scala2/src/main/scala/App1.scala
import org.apache.spark.sql._
// this is Scala 2
object App1 {
val schema = Encoders.product[classa].schema
}
common/src/main/scala/classa.scala common/src/main/scala/classa.scala
case class classa(i: Int, j: Int, s: String)
build.sbt build.sbt
lazy val sparkCore = "org.apache.spark" %% "spark-core" % "3.2.0"
lazy val sparkSql = "org.apache.spark" %% "spark-sql" % "3.2.0"
lazy val scala3V = "3.1.3"
lazy val scala2V = "2.13.8"
lazy val root = project
.in(file("."))
.settings(
scalaVersion := scala3V,
scalacOptions ++= Seq("-language:implicitConversions", "-deprecation"),
libraryDependencies ++= Seq(
excludes(sparkCore.cross(CrossVersion.for3Use2_13)),
excludes(sparkSql.cross(CrossVersion.for3Use2_13)),
excludes("io.github.vincenzobaz" %% "spark-scala3" % "0.1.3"),
"org.scalameta" %% "munit" % "0.7.26" % Test
)
)
.dependsOn(scala2, common)
lazy val scala2 = project
.settings(
scalaVersion := scala2V,
libraryDependencies ++= Seq(
sparkCore,
sparkSql
)
)
.dependsOn(common)
lazy val common = project
.settings(
scalaVersion := scala3V,
crossScalaVersions := Seq(scala2V, scala3V)
)
//netty-all replaces all these excludes
def excludes(m: ModuleID): ModuleID =
m.exclude("io.netty", "netty-common").
exclude("io.netty", "netty-handler").
exclude("io.netty", "netty-transport").
exclude("io.netty", "netty-buffer").
exclude("io.netty", "netty-codec").
exclude("io.netty", "netty-resolver").
exclude("io.netty", "netty-transport-native-epoll").
exclude("io.netty", "netty-transport-native-unix-common").
exclude("javax.xml.bind", "jaxb-api").
exclude("jakarta.xml.bind", "jaxb-api").
exclude("javax.activation", "activation").
exclude("jakarta.annotation", "jakarta.annotation-api").
exclude("javax.annotation", "javax.annotation-api")
// Without forking, ctrl-c doesn't actually fully stop Spark
run / fork := true
Test / fork := true
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.