简体   繁体   中英

kafka multi-threaded consumer with manual commit offset: KafkaConsumer is not safe for multi-threaded access

I use ArrayBlockingQueue to decouple Kafka consumers from sinks:

  1. Multi-threaded consumption of Kafka, one kafka consumer per thread;
  2. Kafka consumer manually manages the offset;
  3. The Kafka consumer wraps the message content and the callback function containing OFFSET into a Record object and sends it to ArrayBlockingQueue ;
  4. Sink fetches the record from ArrayBlockingQueue and processes it. Only after Sink successfully processes the record, does it call the callback function of the Record object (notify the Kafka consumer commitSync)

During the operation, I encountered an error, which troubled me for several days. I don't understand which part of the problem is wrong:

11:44:10.794 [pool-2-thread-1] ERROR com.alibaba.kafka.source.KafkaConsumerRunner - [pool-2-thread-1] ConcurrentModificationException: KafkaConsumer is not safe for multi-threaded access
java.util.ConcurrentModificationException: KafkaConsumer is not safe for multi-threaded access
    at org.apache.kafka.clients.consumer.KafkaConsumer.acquire(KafkaConsumer.java:1824)
    at org.apache.kafka.clients.consumer.KafkaConsumer.acquireAndEnsureOpen(KafkaConsumer.java:1808)
    at org.apache.kafka.clients.consumer.KafkaConsumer.commitSync(KafkaConsumer.java:1255)
    at com.alibaba.kafka.source.KafkaConsumerRunner$1.call(KafkaConsumerRunner.java:75)
    at com.alibaba.kafka.source.KafkaConsumerRunner$1.call(KafkaConsumerRunner.java:71)
    at com.alibaba.kafka.sink.Sink.run(Sink.java:25)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

Source Code:

Queues.java

public class Queues {
    public static volatile BlockingQueue[] queues;

    /**
     * Create Multiple Queues.
     * @param count The number of queues created.
     * @param capacity The Capacity of each queue.
     */
    public static void createQueues(final int count, final int capacity) {
        Queues.queues = new BlockingQueue[count];
        for (int i=0; i<count; ++i) {
            Queues.queues[i] = new ArrayBlockingQueue(capacity, true);
        }
    }
}

Record

@Builder
@Getter
public class Record {
    private final String value;
    private final Callable<Boolean> ackCallback;
}

Sink.java

public class Sink implements Runnable {
    private final int queueId;

    public Sink(int queueId) {
        this.queueId = queueId;
    }

    @Override
    public void run() {
        while (true) {
            try {
                Record record = (Record) Queues.queues[this.queueId].take();
                // (1) Handler: Write to database
                Thread.sleep(10);
                // (2) ACK: notify kafka consumer to commit offset manually
                record.getAckCallback().call();
            } catch (Exception e) {
                e.printStackTrace();
                System.exit(1);
            }
        }
    }
}

KafkaConsumerRunner

@Slf4j
public class KafkaConsumerRunner implements Runnable {
    private final String topic;
    private final KafkaConsumer<String, String> consumer;

    public KafkaConsumerRunner(String topic, Properties properties) {
        this.topic = topic;
        this.consumer = new KafkaConsumer<>(properties);
    }

    @Override
    public void run() {
        // offsets to commit
        Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = new HashMap<>();
        // Subscribe topic
        this.consumer.subscribe(Collections.singletonList(this.topic));
        // Consume Kafka Message
        while (true) {
            try {
                ConsumerRecords<String, String> consumerRecords = this.consumer.poll(10000L);
                for (TopicPartition topicPartition : consumerRecords.partitions()) {
                    for (ConsumerRecord<String, String> consumerRecord : consumerRecords.records(topicPartition)) {
                        // (1) Restore [partition -> offset] Map
                        offsetsToCommit.put(topicPartition, new OffsetAndMetadata(consumerRecord.offset()));
                        // (2) Put into queue
                        int queueId = topicPartition.partition() % Queues.queues.length;
                        Queues.queues[queueId].put(Record.builder()
                                .value(consumerRecord.value())
                                .ackCallback(this.getAckCallback(offsetsToCommit))
                                .build());
                    }
                }
            } catch (ConcurrentModificationException | InterruptedException e) {
                log.error("[{}] {}", Thread.currentThread().getName(), ExceptionUtils.getMessage(e), e);
                System.exit(1);
            }
        }
    }

    private Callable<Boolean> getAckCallback(Map<TopicPartition, OffsetAndMetadata> offsets) {
        return new AckCallback<Boolean>(this.consumer, new HashMap<>(offsets)) {
            @Override
            public Boolean call() throws Exception {
                try {
                    this.getConsumer().commitSync(this.getOffsets());
                    return true;
                } catch (Exception e) {
                    log.error(String.format("[%s] %s", Thread.currentThread().getName(), ExceptionUtils.getMessage(e)), e);
                    return false;
                }
            }
        };
    }

    @Getter
    @AllArgsConstructor
    abstract class AckCallback<T> implements Callable<T> {
        private final KafkaConsumer<String, String> consumer;
        private final Map<TopicPartition, OffsetAndMetadata> offsets;
    }
}

Application.java

public class Application {
    private static final String TOPIC = "YEWEI_TOPIC";
    private static final int QUEUE_COUNT = 1;
    private static final int QUEUE_CAPACITY = 4;
    
    private static void createQueues() {
        Queues.createQueues(QUEUE_COUNT, QUEUE_CAPACITY);
    }

    private static void startupSource() {
        if (null == System.getProperty("java.security.auth.login.config")) {
            System.setProperty("java.security.auth.login.config", "jaas.conf");
        }

        Properties properties = new Properties();
        properties.put(ConsumerConfig.GROUP_ID_CONFIG, "ConsumerGroup1");
        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "cdh1:9092,cdh2:9092,cdh3:9092");
        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class);
        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class);
        properties.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 2);
        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
        properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
        properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_PLAINTEXT");
        properties.put(SaslConfigs.SASL_MECHANISM, "PLAIN");

        ExecutorService executorService = Executors.newFixedThreadPool(QUEUE_COUNT);
        for (int queueId = 0; queueId < QUEUE_COUNT; ++queueId) {
            executorService.execute(new KafkaConsumerRunner(TOPIC, properties));
        }
    }

    private static void startupSinks() {
        ExecutorService executorService = Executors.newFixedThreadPool(QUEUE_COUNT);
        for (int queueId = 0; queueId < QUEUE_COUNT; ++queueId) {
            executorService.execute(new Sink(queueId));
        }
    }

    public static void main(String[] args) {
        Application.createQueues();
        Application.startupSource();
        Application.startupSinks();
    }
}

I figured out this problem. Kafka consumer runs in its own thread and is also called back by the Sink thread. The poll and commitSync method of KafkaConsumer can only be applied to one thread. See org.apache.kafka.clients.consumer.KafkaConsumer#acquireAndEnsureOpen .

Change to: The Sink callback does not directly use the consumer object, but sends the ACK message to the LinkedTransferQueue. KafkaConsumerRunner polls the LinkedTransferQueue every time and batches ACKs

@Slf4j
public class KafkaConsumerRunner implements Runnable {
    private final String topic;
    private final BlockingQueue ackQueue;
    private final KafkaConsumer<String, String> consumer;

    public KafkaConsumerRunner(String topic, Properties properties) {
        this.topic = topic;
        this.ackQueue = new LinkedTransferQueue<Map<TopicPartition, OffsetAndMetadata>>();
        this.consumer = new KafkaConsumer<>(properties);
    }

    @Override
    public void run() {
        // Subscribe topic
        this.consumer.subscribe(Collections.singletonList(this.topic));
        // Consume Kafka Message
        while (true) {
            while (!this.ackQueue.isEmpty()) {
                try {
                    Map<TopicPartition, OffsetAndMetadata> offsets = (Map<TopicPartition, OffsetAndMetadata>) this.ackQueue.take();
                    this.consumer.commitSync(offsets);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }

            ...
        }
    }

    private Callable<Boolean> getAckCallback(Map<TopicPartition, OffsetAndMetadata> offsets) {
        return new AckCallback<Boolean>(new HashMap<>(offsets)) {
            @Override
            public Boolean call() throws Exception {
                try {
                    ackQueue.put(offsets);
                    return true;
                } catch (Exception e) {
                    log.error(String.format("[%s] %s", Thread.currentThread().getName(), ExceptionUtils.getMessage(e)), e);
                    System.exit(1);
                    return false;
                }
            }
        };
    }

    ...
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM