简体   繁体   中英

Spring batch Partitioning with multiple steps in parallel?

I have implemented spring batch partitioning for a single steps where a master step delegates its work to several slave threads which than gets executed in parallel. As shown in following image.(Reference Spring docs )在此处输入图像描述 Now what if I have multiple steps which are to be executed in parallel? How to configure them in batch configuration? My current configuration is

 <batch:job id="myJob" restartable="true" job-repository="jobRepository" >
        <batch:listeners>
            <batch:listener ref="myJoblistener"></batch:listener>
        </batch:listeners>

        <batch:step id="my-master-step">
            <batch:partition step="my-step" partitioner="my-step-partitioner" handler="my-partitioner-handler">
            </batch:partition>
        </batch:step>
    </batch:job>

    <batch:step id="my-step" >
        <batch:tasklet ref="myTasklet" transaction-manager="transactionManager" >
        </batch:tasklet>
        <batch:listeners>
            <batch:listener ref="myStepListener"></batch:listener>
        </batch:listeners> 
    </batch:step>

My architecture diagrams should be like following image:在此处输入图像描述

I am not sure even if it is possible using spring batch.Any ideas or I am way over my head to implement it.Thank you.

You can try the following.

 <batch:job id="myJob" restartable="true" job-repository="jobRepository" >
        <batch:listeners>
            <batch:listener ref="myJoblistener"></batch:listener>
        </batch:listeners>

        <batch:step id="my-master-step">
            <batch:partition step="my-step" partitioner="my-step-partitioner" handler="my-partitioner-handler">
            </batch:partition>
        </batch:step>
    </batch:job>

    <batch:step id="my-step" >
        <batch:job ref="MyChildJob" job-launcher="jobLauncher"
                job-parameters-extractor="jobParametersExtractor" />
        <batch:listeners>
            <batch:listener ref="myStepListener"></batch:listener>
        </batch:listeners> 
    </batch:step>

    <batch:job id="MyChildJob" restartable="false"
        xmlns="http://www.springframework.org/schema/batch">
        <batch:step id="MyChildStep1" next="MyChildStep2">
            <batch:tasklet ref="MyChildStep1Tasklet" transaction-manager="transactionManager" >
            </batch:tasklet>
        </batch:step>

        <batch:step id="MyChildStep2" next="MyChildStep3">
            <batch:tasklet ref="MyChildStep2Tasklet" transaction-manager="transactionManager" >
            </batch:tasklet>
        </batch:step>

        <batch:step id="MyChildStep3">
            <batch:tasklet ref="MyChildStep3Tasklet" transaction-manager="transactionManager" >
            </batch:tasklet>
        </batch:step>

    </batch:job>

I had similar requirement and solved it using below requirement

<batch:job id="cycleJob">
        <batch:step id="zStep" next="gStep">
            <batch:partition partitioner="zPartitioner">
                <batch:step>
                    <batch:tasklet throttle-limit="1">
                        <batch:chunk processor="itemProcessor" reader="zReader" writer="itemWriter" commit-interval="1">
                        </batch:chunk>
                    </batch:tasklet>
                </batch:step>
                <batch:handler task-executor="taskExecutor" grid-size="${maxThreads}" />
            </batch:partition>
        </batch:step>
        <batch:step id="gStep" parent="zStep" next="yStep">
            <batch:partition partitioner="gPartitioner">
                <batch:step>
                    <batch:tasklet throttle-limit="1">
                        <batch:chunk processor="itemProcessor" reader="gReader" writer="itemWriter" commit-interval="1">
                        </batch:chunk>
                    </batch:tasklet>
                </batch:step>
                <batch:handler task-executor="taskExecutor" grid-size="${maxThreads}" />
            </batch:partition>
        </batch:step>
</batch:job>

Late answer, but I finally found the solution I was originally looking for when coming here, using a flow instead of a child job. So I figured I should post it here as well.

    <job id="myJob">
        <step id="my-master-step">
            <partition partitioner="my-step-partitioner">
                <handler task-executor="my-partitioner-handler" />
                <step>
                    <!-- For each partition, we run the complete flow -->
                    <flow parent="mainFlow" />
                </step>
            </partition>
        </step>
    </job>
    
    <!-- The flow consists of several sequential steps (2 here) -->
    <flow id="mainFlow">
        <step id="MyChildStep1" next="MyChildStep2">
            <!-- Here you can have a tasklet or a chunk of course -->
            <tasklet ref="MyChildStep1Tasklet" />
        </step>
        <step id="MyChildStep2">
            <!-- Same here -->
            <tasklet ref="MyChildStep2Tasklet" />
        </step>
    </flow>
    
    <bean id="MyChildStep1Tasklet" class="..." />
    
    <bean id="MyChildStep1Tasklet" class="..." />

I have not tested running it in parallel but I see no reason why it shouldn't work.

As @arunkumar-pushparaj asked, here you can find a simple example using Java configuration:

public class ParallelFlowConfiguration {
protected static final int GRID_SIZE = 4;

@Autowired
private StepBuilderFactory stepBuilderFactory;

@Autowired
private JobBuilderFactory jobBuilderFactory;


@Bean
public Job producerProcess() {      
    return this.jobBuilderFactory.get("partitionedJob").incrementer(new RunIdIncrementer())
            .start(step1(null,null))                
            .build();

}

@Bean
public Step step1(PartitionHandler partitionHandler,Partitioner  partitioner) {
    return this.stepBuilderFactory.get("step1").partitioner(myStepFlow().getName(), partitioner)
            .step(myStepFlow()).partitionHandler(partitionHandler)
            .build();
}

@Bean
public Step step2() {
    return this.stepBuilderFactory.get("step2").tasklet((contribution, chunkContext) -> {
        ExecutionContext executionContext = chunkContext.getStepContext().getStepExecution()
                .getExecutionContext();

        Integer partitionNumber = executionContext.getInt("partitionNumber");

        
        log.info("Eseguo Step DUE: {}",partitionNumber);
        return RepeatStatus.FINISHED;
    }).build();

}

@Bean
public Partitioner partitioner() {
    return new Partitioner() {
        @Override
        public Map<String, ExecutionContext> partition(int gridSize) {

            Map<String, ExecutionContext> partitions = new HashMap<>(gridSize);

            for (int i = 0; i < GRID_SIZE; i++) {
                ExecutionContext context1 = new ExecutionContext();
                context1.put("partitionNumber", i);

                partitions.put("partition" + i, context1);
            }

            return partitions;
        }
    };
}

@Bean
public Flow myFlow() {
    return new FlowBuilder<Flow>("myFlow1").start(step2()).build();
}

@Bean
public Step myStepFlow() {
    return stepBuilderFactory.get("stepFlow")
            
            .flow(myFlow()).build();
}

@Bean 
public PartitionHandler partitionHandler() {
    TaskExecutorPartitionHandler partitionHandler = new TaskExecutorPartitionHandler();
    partitionHandler.setTaskExecutor(new SimpleAsyncTaskExecutor());
    partitionHandler.setStep(myStepFlow());
    return partitionHandler;
}

}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM