繁体   English   中英

结构化流卡夫卡火花 java.lang.NoClassDefFoundError: org/apache/spark/internal/Logging

[英]Structured Streaming kafka spark java.lang.NoClassDefFoundError: org/apache/spark/internal/Logging

我发现很多关于这个问题的常见问题解答,但不起作用。

I am a newbie for java and bigdata, java dependency management is terrible for me.You have to guess which package and version should be used and which package will conflict if the Third-party libraries don't tell you anything

我想解析来自 kafka 主题的 json 数据并保存到 hbase。

主要代码

package com.yizhisec.bigdata;

import com.yizhisec.bigdata.model.Traffic;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.ForeachWriter;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
import org.apache.spark.sql.streaming.StreamingQuery;
import org.apache.spark.sql.streaming.StreamingQueryException;

import java.io.IOException;
import java.util.Properties;

public class KafkaStructStream {

    private Dataset<Row> initStructKafka() throws IOException {
        Properties kafkaProp = Config.getProp();
        SparkSession spark = SparkSession
                .builder()
                .appName("Kafka")
                .master("local[2]")
                .getOrCreate();
        return spark.readStream().format("kafka")
                .option("kafka.bootstrap.servers", kafkaProp.getProperty("kafka.broker.list"))
                .option("kafka.security.protocol", "SSL")
                .option("kafka.ssl.truststore.location", Config.getPath(Config.KAFKA_JKS))
                .option("kafka.ssl.truststore.password", kafkaProp.getProperty("kafka.jks.passwd"))
                .option("startingOffsets", "latest")
                .option("subscribe", kafkaProp.getProperty("kafka.topic"))
                .load();
    }

    private void run() {
        Dataset<Row> df = null;
        try {
            df = initStructKafka();
        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
        df.printSchema();
        Dataset<Traffic> ds = df.as(ExpressionEncoder.javaBean(Traffic.class));

        StreamingQuery query = ds.writeStream().foreach(new ForeachWriter<Traffic>() {
            @Override
            public boolean open(long partitionId, long epochId) {
                return false;
            }

            @Override
            public void process(Traffic value) {
                System.out.println(value);
            }

            @Override
            public void close(Throwable errorOrNull) {

            }
        }).start();

//        StreamingQuery query = ds.writeStream().format("console")
//                .trigger(Trigger.Continuous("2 seconds"))
//                .start();

        try {
            query.awaitTermination();
        } catch (StreamingQueryException e) {
            e.printStackTrace();
        }

    }


    public static void main(String[] args) {
        KafkaStructStream k = new KafkaStructStream();
        k.run();
    }

}

交通.class

public class Traffic {
    private Long guid;
    private int time;
    private int end_time;
    private String srcip;
    private String srcmac;
    private int srcport;
    private String destip;
    private String destmac;
    private int destport;
    private String proto;
    private String appproto;
    private Long upsize;
    private Long downsize;

    getter and setter
}

依赖

<properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <spark.version>2.4.4</spark.version>
        <scala.version>2.11.12</scala.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.12</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.12</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.12</artifactId>
            <version>${spark.version}</version>
        </dependency>


        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming-kafka_2.11</artifactId>
            <version>1.6.3</version>
        </dependency>

        <!--        kafka-->
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.11</artifactId>
            <version>1.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql-kafka-0-10_2.12</artifactId>
            <version>2.4.4</version>
            <scope>provided</scope>
        </dependency>
</dependencies>

错误

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/spark/internal/Logging
    at java.lang.ClassLoader.defineClass1(Native Method)
    at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
    at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
    at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
    at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at com.yizhisec.bigdata.KafkaStructStream.initStructKafka(KafkaStructStream.java:20)
    at com.yizhisec.bigdata.KafkaStructStream.run(KafkaStructStream.java:37)
    at com.yizhisec.bigdata.KafkaStructStream.main(KafkaStructStream.java:76)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.internal.Logging
    at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    ... 15 more

解决方案

经过一番尝试。 最后,我找到了解决方案。 一个非常愚蠢的默认选项浪费了我一天的时间。 我太笨了,找不到 class 已经存在,但是在运行时导入失败,因为提供了 scope 在此处输入图像描述

您必须猜测应该使用哪个 package 和版本

不是真的在猜测... Spark 2.4.x 是使用 Scala 2.12 构建的,并且已记录在案。 你的 POM 说 Scala 2.11.x

您还应该删除spark-streaming-kafka_2.11和 Kafka 依赖项,因为您使用的是结构化流,它需要sql-kafka之一,但未提供,因此删除 scope 标记

如果您总是使用<version>${spark.version}</version> ,那么您不必猜测

旁注:有 Spark Hbase 库,因此您不需要编写自己的 foreach 编写器

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM