简体   繁体   English

Spring Batch分区步骤

[英]Spring Batch partitioning a step

I have multiple CSV files to read. 我有多个CSV文件可供读取。 I want the processing to be done one file at a time. 我希望一次完成一个文件的处理。 Rather than reading all the records till it reaches commit level. 而不是读取所有记录直至达到提交级别。

I have put together a job which uses partition but on running the job I see that there are two entries for every row. 我整理了一个使用分区的作业,但是在运行该作业时,我看到每一行都有两个条目。 As if the job is running twice. 好像作业正在运行两次。

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:context="http://www.springframework.org/schema/context" xmlns:p="http://www.springframework.org/schema/p"
    xmlns:batch="http://www.springframework.org/schema/batch" xmlns:mvc="http://www.springframework.org/schema/mvc"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.springframework.org/schema/beans  
http://www.springframework.org/schema/beans/spring-beans-4.0.xsd  
http://www.springframework.org/schema/context  
http://www.springframework.org/schema/context/spring-context-4.0.xsd  
http://www.springframework.org/schema/mvc  
http://www.springframework.org/schema/mvc/spring-mvc-4.0.xsd  
http://www.springframework.org/schema/batch   
http://www.springframework.org/schema/batch/spring-batch-2.2.xsd">

    <import resource="classpath:/database.xml" />



     <bean id="asyncTaskExecutor" class="org.springframework.core.task.SimpleAsyncTaskExecutor" >
     <property name="concurrencyLimit" value="1"></property>
     </bean>  

     <bean id="taskExecutor" class="org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor">
        <property name="corePoolSize" value="5" />
    </bean>

     <bean id="partitioner" class="org.springframework.batch.core.partition.support.MultiResourcePartitioner" scope="step">
        <property name="resources" value="file:#{jobParameters[filePath]}/*.dat" />
    </bean>

    <bean id="multiResourceReader"
        class="org.springframework.batch.item.file.MultiResourceItemReader"
        scope="step">
        <property name="resources" value="file:#{jobParameters[filePath]}/*.dat"></property>
        <property name="delegate" ref="logItFileReader"></property>
    </bean>



    <batch:job id="remediationJob">
        <batch:step id="partitionedStep" >
            <batch:partition step="readWriteContactsPartitionedStep" partitioner="partitioner">
                <batch:handler task-executor="asyncTaskExecutor" />
            </batch:partition>
        </batch:step>
    </batch:job>

    <batch:step id="readWriteContactsPartitionedStep">
        <batch:tasklet>
            <batch:transaction-attributes isolation="READ_UNCOMMITTED"/>
            <batch:chunk reader="multiResourceReader" writer="rawItemDatabaseWriter" commit-interval="10" skip-policy="pdwUploadSkipPolicy"/>
        <batch:listeners>
                    <batch:listener ref="customItemReaderListener"></batch:listener>
                    <batch:listener ref="csvLineSkipListener"></batch:listener>
                    <batch:listener ref="getCurrentResourceChunkListener"></batch:listener>

                </batch:listeners>
        </batch:tasklet>    
    </batch:step>


    <bean id="logItFileReader" class="org.springframework.batch.item.file.FlatFileItemReader" scope="step">
        <!-- Read a csv file -->

        <property name="strict" value="false"></property>
        <property name="lineMapper">
            <bean class="org.springframework.batch.item.file.mapping.DefaultLineMapper">
                <!-- split it -->
                <property name="lineTokenizer">
                    <bean
                        class="org.springframework.batch.item.file.transform.DelimitedLineTokenizer">
                        <property name="delimiter" value="@##@" />
                        <property name="strict" value="true" />
                    </bean>
                </property>
                <property name="fieldSetMapper">
                    <!-- map to an object -->
                    <bean class="org.kp.oppr.remediation.batch.vo.CSVDataVOFieldMapper">

                    </bean>
                </property>
            </bean>
        </property>
    </bean>

    <bean id="rawItemDatabaseWriter" class="org.kp.oppr.remediation.batch.csv.RawItemDatabaseWriter"
        scope="step">
    </bean>

    <bean id="pdwUploadSkipPolicy"
        class="org.springframework.batch.core.step.skip.AlwaysSkipItemSkipPolicy" />

    <bean id="csvDataVO" class="org.kp.oppr.remediation.batch.vo.CSVDataVO"
        scope="prototype"></bean>


    <!-- BATCH LISTENERS -->

    <bean id="pdwFileMoverListener"
        class="org.kp.oppr.remediation.batch.listener.PdwFileMoverListener"
        scope="step">
    </bean>

    <bean id="csvLineSkipListener"
        class="org.kp.oppr.remediation.batch.listener.CSVLineSkipListener"
        scope="step">
    </bean>

    <bean id="customItemReaderListener"
        class="org.kp.oppr.remediation.batch.listener.CustomItemReaderListener"></bean>

     <bean id="getCurrentResourceChunkListener" 
          class="org.kp.oppr.remediation.batch.listener.GetCurrentResourceChunkListener">
        <property name="proxy" ref ="multiResourceReader" />
    </bean>
    <!-- 
    <bean id="stepListener" class="org.kp.oppr.remediation.batch.listener.ExampleStepExecutionListener">
        <property name="resources" ref="multiResourceReader"/>
    </bean>
     -->
    <!-- Skip Policies -->

</beans>  

Is there something I am missing here ? 我在这里缺少什么吗?

Well You have 2 questions there 好吧,你有两个问题

1 - "I want the processing to be done one file at a time. Rather than reading all the records till it reaches commit level." 1-“我希望一次完成一个文件的处理。而不是读取所有记录直到达到提交级别。” Set Commit-Interval to 1 - It will read an item, process it and the writer will wait until it has 1 item to write. 将Commit-Interval设置为1-它会读取一个项目,对其进行处理,而writer将等待直到有1个项目要写入。

2 - As if the job is running twice. 2-好像作业正在运行两次。

Looks like it will run as many times as the number of files you have. 看起来它将运行与您拥有的文件数量一样多的次数。

You should not use a MultiResourceItemReader for this step. 您不应在此步骤中使用MultiResourceItemReader。 The partitioner splits the resources into multiple and create separate execution contexts. 分区程序将资源拆分为多个资源,并创建单独的执行上下文。 Your MultiResourceItemReader is again considering all the files because of the resources property being set. 由于设置了resources属性,因此您的MultiResourceItemReader再次考虑了所有文件。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM