[英]Publishing multiple messages simultaneously to Kafka patitions using multi threading for testing purpose to check the performance
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.errors.SerializationException;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Properties;
class MultithreadingDemo extends Thread
{
public void run()
{
Properties props = new Properties();
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "xxx:443");
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
io.confluent.kafka.serializers.KafkaAvroSerializer.class);
props.put("schema.registry.url", "xxx");
props.put(ProducerConfig.ACKS_CONFIG, "all");
props.put("security.protocol", "SSL");
props.put("ssl.truststore.location", "xxx");
props.put("ssl.truststore.password", "xxx");
props.put("ssl.keystore.location", "xxx");
props.put("ssl.keystore.password", "xxx");
props.put("ssl.key.password", "xxx");
KafkaProducer producer = new KafkaProducer(props);
String userSchema = "{ \"name\": \"MyClass\", \"type\": \"record\", \"namespace\":
\"com.oop.hts\", \"fields\": [ { \"name\": \"appId\", \"type\":
\"string\" }, { \"name\": \"appName\", \"type\": \"string\" }, {
\"name\": \"groups\", \"type\": \"string\" }, { \"name\": \"subGroups\",
\"type\": \"string\" }, { \"name\": \"jobType\", \"type\": \"string\"
}, { \"name\": \"appStartTime\", \"type\": \"string\" }, {
\"name\": \"appEndTime\", \"type\": \"string\" }, { \"name\":
\"appDuration\", \"type\": \"int\" }, { \"name\": \"cpuTime\",
\"type\": \"int\" }, { \"name\": \"runTime\", \"type\": \"int\" },
{ \"name\": \"memoryUsage\", \"type\": \"int\" }, { \"name\":
\"appStatus\", \"type\": \"string\" }, { \"name\": \"appResult\",
\"type\": \"string\" }, { \"name\": \"failureREason\", \"type\":
\"string\" }, { \"name\": \"recordCount\", \"type\": \"string\" },
{ \"name\": \"numexecutors\", \"type\": \"string\" }, { \"name\":
\"executorcores\", \"type\": \"string\" }, { \"name\":
\"executormemory\", \"type\": \"string\" } ] }\n" +
"";
System.out.println("schema:" + userSchema);
Schema.Parser parser = new Schema.Parser();
Schema schema = parser.parse(userSchema);
GenericRecord avroRecord = new GenericData.Record(schema);
//avroRecord.put("f1", "value777");
System.out.println("----" + avroRecord);
avroRecord.put("appId","spark-d0731a81f1b64f109c5d985c1b2e0011");
avroRecord.put("appName","H@S-UCR");
avroRecord.put("groups","");
avroRecord.put("subGroups","");
avroRecord.put("jobType","");
avroRecord.put("appStartTime","2020-04-13T10:02:25.902");
avroRecord.put("appEndTime","2020-04-13T10:02:25.902");
avroRecord.put("appDuration",4110);
avroRecord.put("cpuTime",337468);
avroRecord.put("runTime",1198987);
avroRecord.put("memoryUsage",234933352);
avroRecord.put("appStatus","Running");
avroRecord.put("appResult","InProgress");
avroRecord.put("failureREason","");
avroRecord.put("recordCount","0");
avroRecord.put("numexecutors","25");
avroRecord.put("executorcores","15");
avroRecord.put("executormemory","60g");
System.out.println("----"+ avroRecord);
ProducerRecord<String, GenericRecord> record = new ProducerRecord<String,
GenericRecord>("kaas.topic", avroRecord);
try {
producer.send(record);
System.out.println("Successfully produced the records to the Kafka topic :
kaas.dqhats.target ");
} catch(SerializationException e) {
System.out.println("An Exception occured" + e.getMessage());
e.printStackTrace();
}
}
}
// Main Class
public class Multithread
{
public static void main(String[] args)
{
int n = 8; // Number of threads
for (int i=0; i<n; i++)
{
MultithreadingDemo object = new MultithreadingDemo();
object.start();
}
}
}
我想使用多线程向 kafka 分区生成多条消息。(这是检查 kafka 主题/分区性能/容量所必需的)
使用以下代码,我无法并行向 kafka 分区生成消息。
寻求帮助。
使用多线程将多条消息同时发布到 Kafka patition 以进行测试以检查性能
任何人都可以帮助我使用多线程同时将多条消息发布到 Kafka patitions。
send()
方法只会将消息放在缓冲区中,并且消息将作为单独线程的一部分发送。 本质上,这就是所展示的生产者的异步性质。
此外,在调用send()
方法之后 - 此调用返回的Future
object 将被忽略,因此您实际上无法知道您的消息是否已发送。
您可以尝试:
producer.send(record).get();
这将在继续之前等待 Kafka 的响应,如果将该消息发送到 Kafka 时出现任何问题,您将收到错误消息。
或者
send()
之后调用flush()
方法。顾名思义,此方法将刷新缓冲区中的消息,但如果您想了解更多信息,请参阅此处的参考文档。
希望这可以帮助!
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.