简体   繁体   中英

how to partition steps in spring-batch?

I am learning spring batch and wrote simple application to play with it. Acccording my requirements I read from single csv file, do some transformation and insert into database.

I have followng configuration:

    @Bean
    public Step step1(JdbcBatchItemWriter<Person> writer) {
        return stepBuilderFactory.get("step1")
                .<Person, Person>chunk(10)
                .reader(reader())
                .processor(processor())
                .writer(writer)
                .build();
    }

   @Bean
    public Job importUserJob(JobCompletionNotificationListener listener, Step step1, Step step2) {
        return jobBuilderFactory.get("importUserJob")
                .incrementer(new RunIdIncrementer())
                .listener(listener)
                .listener(new JobExecutionListener() {
                    @Override
                    public void beforeJob(JobExecution jobExecution) {
                        System.out.println("!!!!!!!!!!!!!SECOND_LISTENER_BEFORE!!!!!!!!!!!!!!!!");
                    }

                    @Override
                    public void afterJob(JobExecution jobExecution) {
                        System.out.println("!!!!!!!!!!!!!SECOND_LISTENER_AFTER!!!!!!!!!!!!!!!!");

                    }
                })
                .flow(step1)
                .next(step2)
                .end()
                .build();
    }

public FlatFileItemReader reader() {
    return new FlatFileItemReaderBuilder()
        .name("csvPersonReader")
        .resource(csvResource)
        .delimited()
        .names(new String[]{"firstName", "lastName"})
        .fieldSetMapper(new BeanWrapperFieldSetMapper<Person>() {{
            setTargetType(Person.class);
        }})
        .build();

}

Now I want to make that step executing using 10 threads. As far I understood I need to use partitioning feaure for that. I've found several examples about it but it contains XML configuration. I prefer to use java configuration.

How can I achieve it ?

PS

I tried the following approach:

@Bean
public Step step1(JdbcBatchItemWriter<Person> writer) {
    ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
    taskExecutor.setCorePoolSize(1);
    TaskletStep step1 = stepBuilderFactory.get("step1")
            .<Person, Person>chunk(10)
            .reader(reader())
            .processor(processor())
            .writer(writer)
            .taskExecutor(taskExecutor)
            .build();

    return step1;
}

But my application hangs. Moreover it doesn't a partition and will work only on a single PC

Your configuration is wrong. Follow below configuration. You need to decide the logic on which you want to partition. Look at the partition method of partitioner how it is creating a map and adding to Execution Context.

Follow below code

@Bean
public Step step1(JdbcBatchItemWriter<Person> writer) {
    TaskletStep step1 = stepBuilderFactory.get("partionerStep")
            .partitioner("slaveStep", partitioner())
            .step(slaveStep())
            .taskExecutor(taskExecutor())
            .build();
}

@Bean
public CustomPartitioner partitioner() {
    CustomPartitioner partitioner = new CustomPartitioner();
    return partitioner;
}

public class CustomPartitioner implements Partitioner {

    @Override
    public Map<String, ExecutionContext> partition(int gridSize) {
        Map<String, ExecutionContext> map = new HashMap<>(gridSize);
        int i = 0, k = 1;
        for (Resource resource : resources) {
            ExecutionContext context = new ExecutionContext();
            context.putString("keyName", ""); //Depends on what logic you want to use to split 
            map.put("PARTITION_KEY" + i, context);
            i++;
        }
        return map;
    }
}

@Bean
    public TaskExecutor taskExecutor() {
        ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
        taskExecutor.setMaxPoolSize(10);
        taskExecutor.setCorePoolSize(10);
        taskExecutor.setQueueCapacity(10);
        taskExecutor.afterPropertiesSet();
        return taskExecutor;
    }

@Bean
public Step slaveStep() 
  throws UnexpectedInputException, MalformedURLException, ParseException {
    return steps.get("slaveStep")
                .<Person, Person>chunk(10)
                .reader(reader())
                .processor(processor())
                .writer(writer)
                .build();
}

@Bean
    public Job importUserJob(JobCompletionNotificationListener listener, Step step1, Step step2) {
        return jobBuilderFactory.get("importUserJob")
                .incrementer(new RunIdIncrementer())
                .listener(listener)
                .listener(new JobExecutionListener() {
                    @Override
                    public void beforeJob(JobExecution jobExecution) {
                        System.out.println("!!!!!!!!!!!!!SECOND_LISTENER_BEFORE!!!!!!!!!!!!!!!!");
                    }

                    @Override
                    public void afterJob(JobExecution jobExecution) {
                        System.out.println("!!!!!!!!!!!!!SECOND_LISTENER_AFTER!!!!!!!!!!!!!!!!");

                    }
                })
                .flow(step1)
                .next(step2)
                .end()
                .build();
    }

    return step1;
}

You can use below code to implement batch Partition.

@Configuration
public class DemoJobBatchConfiguration {

    private static final Logger LOGGER = LoggerFactory.getLogger(DemoJobBatchConfiguration.class);

    @Autowired
    private JobBuilderFactory jobBuilderFactory;

    @Autowired
    private StepBuilderFactory stepBuilderFactory;

    @Autowired
    @Qualifier("applicaionDS")
    public DataSource dataSource;

    @Autowired
    UserWritter userWriter;

    @Bean("demoJob")
    public Job partitionJob(JobNotificationListener listener, JobBuilderFactory jobBuilderFactory,
            @Qualifier("demoPartitionStep") Step demoPartitionStep) {
        return jobBuilderFactory.get("demoJob").incrementer(new RunIdIncrementer()).listener(listener)
                .start(demoPartitionStep).build();
    }

    @Bean(name = "demoPartitionStep")
    public Step demoPartitionStep(Step demoSlaveStep, StepBuilderFactory stepBuilderFactory) {
        return stepBuilderFactory.get("demoPartitionStep").partitioner("demoPartitionStep", demoPartitioner())
                .gridSize(21).step(demoSlaveStep).taskExecutor(jobTaskExecutor()).build();
    }

    @Bean(name = "demoPartitioner", destroyMethod = "")
    public Partitioner demoPartitioner() {
        DemoPartitioner partitioner = new DemoPartitioner();
        // partitioner.partition(20);
        return partitioner;
    }

    @Bean
    public Step demoSlaveStep(ItemReader<User> demoReader, ItemProcessor<User, User> demoJobProcessor) {
        return stepBuilderFactory.get("demoSlaveStep").<User, User>chunk(3).reader(demoReader)
                .processor(demoJobProcessor).writer(userWriter).build();
    }

    @Bean(name = "demoReader")
    @StepScope
    public JdbcCursorItemReader<User> demoReader(@Value("#{stepExecutionContext[SQL]}") String SQL,
            @Value("#{jobParameters[JOB_PARM]}") String jobParm,
            @Value("#{jobExecutionContext[jobExecutionParameter]}") String jobExecutionParameter) {
        LOGGER.info("---------------------- demoReader ------------------------------- " + SQL);
        LOGGER.info(" jobParm : " + jobParm);
        LOGGER.info(" jobExecutionParameter : " + jobExecutionParameter);

        JdbcCursorItemReader<User> reader = new JdbcCursorItemReader<>();
        reader.setDataSource(this.dataSource);
        reader.setFetchSize(200);
        reader.setRowMapper(new BeanPropertyRowMapper<>(User.class));
        reader.setSql(SQL);
        return reader;
    }

    @Bean(name = "demoJobProcessor")
    @StepScope
    public ItemProcessor<User, User> demoJobProcessor() throws Exception {
        LOGGER.info(" DemoJobBatchConfiguration: demoJobProcessor  ");
        return new UserProcessor();
    }

    /*
     * @Bean public ItemWriter<User> demoWriter() { return users -> { for (User user
     * : users) { if (LOGGER.isInfoEnabled()) { LOGGER.info("user read is :: " +
     * user.toString()); } } if (LOGGER.isInfoEnabled()) {
     * LOGGER.info("%%%%%%%%%%%%%%%%%%%%% demoWriter %%%%%%%%%%%%%%%%%%%%% "); } };
     * }
     */

    @Bean
    public TaskExecutor jobTaskExecutor() {
        ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
        // there are 21 sites currently hence we have 21 threads
        taskExecutor.setMaxPoolSize(30);
        taskExecutor.setCorePoolSize(25);
        taskExecutor.afterPropertiesSet();
        return taskExecutor;
    }

}

public class DemoPartitioner implements Partitioner {

    @Override
    public Map<String, ExecutionContext> partition(int gridSize) {

        Map<String, ExecutionContext> result = new HashMap<String, ExecutionContext>();

        int range = 3;
        int fromId = 1;
        int toId = range;

        for (int i = fromId; i <= gridSize;) {
            ExecutionContext executionContext = new ExecutionContext();
            String SQL = "SELECT * FROM CUSTOMER WHERE ID BETWEEN " + fromId + " AND " + toId;
            System.out.println("SQL : " + SQL);
            executionContext.putInt("fromId", fromId);
            executionContext.putInt("toId", toId);
            executionContext.putString("SQL", SQL);
            executionContext.putString("name", "Thread" + i);
            result.put("partition" + i, executionContext);
            fromId = toId + 1;
            i = fromId;
            toId += range;
        }
        return result;
    }

}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM