[英]kafka multi-threaded consumer with manual commit offset: KafkaConsumer is not safe for multi-threaded access
我使用ArrayBlockingQueue將 Kafka 消費者與接收器分離:
操作過程中遇到了一個錯誤,困擾了我好幾天。 我不明白問題的哪一部分是錯誤的:
11:44:10.794 [pool-2-thread-1] ERROR com.alibaba.kafka.source.KafkaConsumerRunner - [pool-2-thread-1] ConcurrentModificationException: KafkaConsumer is not safe for multi-threaded access
java.util.ConcurrentModificationException: KafkaConsumer is not safe for multi-threaded access
at org.apache.kafka.clients.consumer.KafkaConsumer.acquire(KafkaConsumer.java:1824)
at org.apache.kafka.clients.consumer.KafkaConsumer.acquireAndEnsureOpen(KafkaConsumer.java:1808)
at org.apache.kafka.clients.consumer.KafkaConsumer.commitSync(KafkaConsumer.java:1255)
at com.alibaba.kafka.source.KafkaConsumerRunner$1.call(KafkaConsumerRunner.java:75)
at com.alibaba.kafka.source.KafkaConsumerRunner$1.call(KafkaConsumerRunner.java:71)
at com.alibaba.kafka.sink.Sink.run(Sink.java:25)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
源代碼:
隊列.java
public class Queues {
public static volatile BlockingQueue[] queues;
/**
* Create Multiple Queues.
* @param count The number of queues created.
* @param capacity The Capacity of each queue.
*/
public static void createQueues(final int count, final int capacity) {
Queues.queues = new BlockingQueue[count];
for (int i=0; i<count; ++i) {
Queues.queues[i] = new ArrayBlockingQueue(capacity, true);
}
}
}
記錄
@Builder
@Getter
public class Record {
private final String value;
private final Callable<Boolean> ackCallback;
}
水槽.java
public class Sink implements Runnable {
private final int queueId;
public Sink(int queueId) {
this.queueId = queueId;
}
@Override
public void run() {
while (true) {
try {
Record record = (Record) Queues.queues[this.queueId].take();
// (1) Handler: Write to database
Thread.sleep(10);
// (2) ACK: notify kafka consumer to commit offset manually
record.getAckCallback().call();
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
}
}
KafkaConsumerRunner
@Slf4j
public class KafkaConsumerRunner implements Runnable {
private final String topic;
private final KafkaConsumer<String, String> consumer;
public KafkaConsumerRunner(String topic, Properties properties) {
this.topic = topic;
this.consumer = new KafkaConsumer<>(properties);
}
@Override
public void run() {
// offsets to commit
Map<TopicPartition, OffsetAndMetadata> offsetsToCommit = new HashMap<>();
// Subscribe topic
this.consumer.subscribe(Collections.singletonList(this.topic));
// Consume Kafka Message
while (true) {
try {
ConsumerRecords<String, String> consumerRecords = this.consumer.poll(10000L);
for (TopicPartition topicPartition : consumerRecords.partitions()) {
for (ConsumerRecord<String, String> consumerRecord : consumerRecords.records(topicPartition)) {
// (1) Restore [partition -> offset] Map
offsetsToCommit.put(topicPartition, new OffsetAndMetadata(consumerRecord.offset()));
// (2) Put into queue
int queueId = topicPartition.partition() % Queues.queues.length;
Queues.queues[queueId].put(Record.builder()
.value(consumerRecord.value())
.ackCallback(this.getAckCallback(offsetsToCommit))
.build());
}
}
} catch (ConcurrentModificationException | InterruptedException e) {
log.error("[{}] {}", Thread.currentThread().getName(), ExceptionUtils.getMessage(e), e);
System.exit(1);
}
}
}
private Callable<Boolean> getAckCallback(Map<TopicPartition, OffsetAndMetadata> offsets) {
return new AckCallback<Boolean>(this.consumer, new HashMap<>(offsets)) {
@Override
public Boolean call() throws Exception {
try {
this.getConsumer().commitSync(this.getOffsets());
return true;
} catch (Exception e) {
log.error(String.format("[%s] %s", Thread.currentThread().getName(), ExceptionUtils.getMessage(e)), e);
return false;
}
}
};
}
@Getter
@AllArgsConstructor
abstract class AckCallback<T> implements Callable<T> {
private final KafkaConsumer<String, String> consumer;
private final Map<TopicPartition, OffsetAndMetadata> offsets;
}
}
應用.java
public class Application {
private static final String TOPIC = "YEWEI_TOPIC";
private static final int QUEUE_COUNT = 1;
private static final int QUEUE_CAPACITY = 4;
private static void createQueues() {
Queues.createQueues(QUEUE_COUNT, QUEUE_CAPACITY);
}
private static void startupSource() {
if (null == System.getProperty("java.security.auth.login.config")) {
System.setProperty("java.security.auth.login.config", "jaas.conf");
}
Properties properties = new Properties();
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "ConsumerGroup1");
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "cdh1:9092,cdh2:9092,cdh3:9092");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, org.apache.kafka.common.serialization.StringDeserializer.class);
properties.put(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 2);
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
properties.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_PLAINTEXT");
properties.put(SaslConfigs.SASL_MECHANISM, "PLAIN");
ExecutorService executorService = Executors.newFixedThreadPool(QUEUE_COUNT);
for (int queueId = 0; queueId < QUEUE_COUNT; ++queueId) {
executorService.execute(new KafkaConsumerRunner(TOPIC, properties));
}
}
private static void startupSinks() {
ExecutorService executorService = Executors.newFixedThreadPool(QUEUE_COUNT);
for (int queueId = 0; queueId < QUEUE_COUNT; ++queueId) {
executorService.execute(new Sink(queueId));
}
}
public static void main(String[] args) {
Application.createQueues();
Application.startupSource();
Application.startupSinks();
}
}
我想通了這個問題。 Kafka 消費者在自己的線程中運行,也被 Sink 線程回調。 KafkaConsumer 的poll
和commitSync
方法只能應用於一個線程。 請參閱org.apache.kafka.clients.consumer.KafkaConsumer#acquireAndEnsureOpen
。
改為: Sink回調不直接使用consumer
object,而是向LinkedTransferQueue發送ACK消息。 KafkaConsumerRunner 每次都會輪詢 LinkedTransferQueue 並批量發送 ACK
@Slf4j
public class KafkaConsumerRunner implements Runnable {
private final String topic;
private final BlockingQueue ackQueue;
private final KafkaConsumer<String, String> consumer;
public KafkaConsumerRunner(String topic, Properties properties) {
this.topic = topic;
this.ackQueue = new LinkedTransferQueue<Map<TopicPartition, OffsetAndMetadata>>();
this.consumer = new KafkaConsumer<>(properties);
}
@Override
public void run() {
// Subscribe topic
this.consumer.subscribe(Collections.singletonList(this.topic));
// Consume Kafka Message
while (true) {
while (!this.ackQueue.isEmpty()) {
try {
Map<TopicPartition, OffsetAndMetadata> offsets = (Map<TopicPartition, OffsetAndMetadata>) this.ackQueue.take();
this.consumer.commitSync(offsets);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
...
}
}
private Callable<Boolean> getAckCallback(Map<TopicPartition, OffsetAndMetadata> offsets) {
return new AckCallback<Boolean>(new HashMap<>(offsets)) {
@Override
public Boolean call() throws Exception {
try {
ackQueue.put(offsets);
return true;
} catch (Exception e) {
log.error(String.format("[%s] %s", Thread.currentThread().getName(), ExceptionUtils.getMessage(e)), e);
System.exit(1);
return false;
}
}
};
}
...
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.