简体   繁体   English

无法在Dataflow SDK 2.1.0中创建可执行Jar

[英]Unable to create executable Jar in Dataflow SDK 2.1.0

I created Jar with the following pom file. 我使用以下pom文件创建了Jar。

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>XXXXXXXXXXXXX</groupId>
<artifactId>XXXXXXXXXXXX</artifactId>
<version>XXXXXXXXXXXXX</version>
<packaging>jar</packaging>

<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <java.version>1.8</java.version>
    <dependency.locations.enabled>false</dependency.locations.enabled>
</properties>

<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.1</version>
            <configuration>
                <source>${java.version}</source>
                <target>${java.version}</target>
                <compilerArgs>
                    <arg>-Xlint:all</arg>
                    <arg>-Xlint:-processing</arg>
                </compilerArgs>
            </configuration>
        </plugin>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-assembly-plugin</artifactId>
            <version>2.4.1</version>
            <configuration>
                <!-- get all project dependencies -->
                <descriptorRefs>
                    <descriptorRef>jar-with-dependencies</descriptorRef>
                </descriptorRefs>
                <!-- MainClass in mainfest make a executable jar -->
                <archive>
                    <manifest>
                        <mainClass>XXXXXXXXXXXXXXXX</mainClass>
                    </manifest>
                </archive>
            </configuration>
            <executions>
                <execution>
                    <id>make-assembly</id>
                    <!-- bind to the packaging phase -->
                    <phase>package</phase>
                    <goals>
                        <goal>single</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>
        <plugin>
            <groupId>org.jacoco</groupId>
            <artifactId>jacoco-maven-plugin</artifactId>
            <version>0.7.9</version>
            <executions>
                 <execution>
                     <id>prepare-agent</id>
                     <goals>
                         <goal>prepare-agent</goal>
                     </goals>
                 </execution>
            </executions>
        </plugin>
    </plugins>
</build>

<reporting>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-project-info-reports-plugin</artifactId>
            <version>2.9</version>
            <reportSets>
                <reportSet>
                    <reports>
                        <report>summary</report>
                    </reports>
                </reportSet>
            </reportSets>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-jxr-plugin</artifactId>
            <version>2.5</version>
        </plugin>

        <plugin>
            <groupId>org.codehaus.mojo</groupId>
            <artifactId>findbugs-maven-plugin</artifactId>
            <version>3.0.4</version>
            <!--<version>3.0.5-SNAPSHOT</version> -->
            <configuration>
                <effort>Max</effort>
                <threshold>Low</threshold>
                <xmlOutput>true</xmlOutput>
            </configuration>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-pmd-plugin</artifactId>
            <version>3.7</version>
            <configuration>
                <rulesets>
                    <ruleset>/rulesets/java/basic.xml</ruleset>
                    <ruleset>/rulesets/java/design.xml</ruleset>
                </rulesets>
            </configuration>
        </plugin>

        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-checkstyle-plugin</artifactId>
            <version>2.17</version>
            <reportSets>
                <reportSet>
                    <reports>
                        <report>checkstyle</report>
                    </reports>
                </reportSet>
            </reportSets>
        </plugin>

    </plugins>
</reporting>

<dependencies>
    <dependency>
        <groupId>com.google.cloud.dataflow</groupId>
        <artifactId>google-cloud-dataflow-java-sdk-all</artifactId>
        <version>2.1.0</version>
    </dependency>

    <dependency>
        <groupId>com.google.cloud</groupId>
        <artifactId>google-cloud-storage</artifactId>
        <version>1.2.0</version>
    </dependency>

    <dependency>
        <groupId>javax.servlet</groupId>
        <artifactId>javax.servlet-api</artifactId>
        <version>3.1.0</version>
    </dependency>

    <dependency>
        <groupId>org.jmockit</groupId>
        <artifactId>jmockit</artifactId>
        <version>1.30</version>
    </dependency>

    <dependency>
        <groupId>org.mockito</groupId>
        <artifactId>mockito-all</artifactId>
        <version>1.10.19</version>
        <scope>test</scope>
    </dependency>

    <!-- Hamcrest and JUnit are required dependencies of DataflowAssert, which 
        is used in the main code of DebuggingWordCount example. -->

    <dependency>
        <groupId>org.hamcrest</groupId>
        <artifactId>hamcrest-all</artifactId>
        <version>1.3</version>
    </dependency>

    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.12</version>
    </dependency>

    <dependency>
        <groupId>commons-lang</groupId>
        <artifactId>commons-lang</artifactId>
        <version>2.6</version>
    </dependency>
    <dependency>
        <groupId>ch.qos.logback</groupId>
        <artifactId>logback-classic</artifactId>
        <version>1.0.7</version>
    </dependency>
</dependencies>

maven command Maven命令

mvn clean
mvn compile
mvn package

Jar Command to execute Jar命令执行

 java -jar XXXXXX-jar-with-dependencies.jar
      --runner=DataflowRunner
      --jobName=XXXXXXXXXX 
      --project=XXXXXXXXXX 
      --network=XXXXXXXXXX
      --subnetwork=XXXXXXXXXXXX 
      --workerMachineType=XXXXXXX 
      --region=XXXXXXXXXX 
      --maxNumWorkers=XXX 
      --stagingLocation=XXXXXXXXXX 
      --tempLocation=XXXXXXXXXXXX
      --dataflowJobFile=XXXXXXXXXX

The following error occurred 发生以下错误

java.lang.IllegalArgumentException: Unknown 'runner' specified 'DataflowRunner', supported pipeline runners [DirectRunner]
at org.apache.beam.sdk.options.PipelineOptionsFactory.parseObjects(PipelineOptionsFactory.java:1615)
at org.apache.beam.sdk.options.PipelineOptionsFactory.access$400(PipelineOptionsFactory.java:104)
at org.apache.beam.sdk.options.PipelineOptionsFactory$Builder.as(PipelineOptionsFactory.java:291)
Caused by: java.lang.ClassNotFoundException: DataflowRunner
at java.net.URLClassLoader.findClass(URLClassLoader.java:381) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:264)

As a workaround for the above error, we added the following to the pom file and use DirectRunner only when testing. 作为上述错误的解决方法,我们将以下内容添加到pom文件中,并且仅在测试时使用DirectRunner。

    <dependency>
        <groupId>org.apache.beam</groupId>
        <artifactId>beam-runners-direct-java</artifactId>
        <version>2.1.0</version>
        <scope>test</scope>
    </dependency>

When executing with the above workaround, the following error occurred. 使用上述解决方法执行时,发生以下错误。

java.lang.IllegalArgumentException: Unable to infer a coder and no Coder was specified. Please set a coder by invoking Create.withCoder() explicitly.
at org.apache.beam.sdk.transforms.Create$Values.expand(Create.java:318) 
at org.apache.beam.sdk.transforms.Create$Values.expand(Create.java:268)
at org.apache.beam.sdk.Pipeline.applyInternal(Pipeline.java:514)
at org.apache.beam.sdk.Pipeline.applyTransform(Pipeline.java:473)
at org.apache.beam.sdk.values.PBegin.apply(PBegin.java:56)
at org.apache.beam.sdk.Pipeline.apply(Pipeline.java:180)
at org.apache.beam.sdk.io.gcp.bigquery.BatchLoads.expand(BatchLoads.java:344)
at org.apache.beam.sdk.io.gcp.bigquery.BatchLoads.expand(BatchLoads.java:67)
at org.apache.beam.sdk.Pipeline.applyInternal(Pipeline.java:514) 
at org.apache.beam.sdk.Pipeline.applyTransform(Pipeline.java:454)
at org.apache.beam.sdk.values.PCollection.apply(PCollection.java:284)
at org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$Write.expandTyped(BigQueryIO.java:1019)
at org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$Write.expand(BigQueryIO.java:972)
at org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO$Write.expand(BigQueryIO.java:659)
at org.apache.beam.sdk.Pipeline.applyInternal(Pipeline.java:514)
at org.apache.beam.sdk.Pipeline.applyTransform(Pipeline.java:473)
at org.apache.beam.sdk.values.PCollection.apply(PCollection.java:297)
Caused by: org.apache.beam.sdk.coders.CannotProvideCoderException: Unable to provide a Coder for com.google.api.services.bigquery.model.TableRow.
Building a Coder using a registered CoderProvider failed.
See suppressed exceptions for detailed failures.

As a workaround for the above error, setCoder is set, but the result does not change. 作为上述错误的解决方法,设置了setCoder,但结果没有改变。

In SDK 1.9.0 we were able to create executable Jar with maven. 在SDK 1.9.0中,我们能够使用maven创建可执行的Jar。

If Jar can be created, can you give me a hint of how to change pom file and how to run jar? 如果可以创建Jar,可以提示我如何更改pom文件以及如何运行jar?

Have you tried including the beam-runners-google-cloud-dataflow-java dependency? 您是否尝试过包含beam-runners-google-cloud-dataflow-java依赖项?

https://beam.apache.org/documentation/runners/dataflow/ https://beam.apache.org/documentation/runners/dataflow/

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM