[英]How to resolve storm kafka spout only consumer a half data from kafka?
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>1.0.2</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.10.0.0</version>
<scope>compile</scope>
</dependency>
I use storm-kafka-client of Storm Kafka Spout with New Kafka Consumer API in apache/storm/external/storm-kafka-client . 我在apache / storm / external / storm-kafka-client中将Storm Kafka Spout的 storm-kafka-client 与New Kafka Consumer API一起使用 。 My topology look like this:
我的拓扑如下所示:
public class AnalyseTopo {
private static final Logger LOG = LoggerFactory.getLogger(AnalyseTopo.class);
private static final String[] STREAMS = new String[]{"test_stream"};
private static final String[] TOPICS = new String[]{"online"};
public static void main(String[] args) throws Exception {
new AnalyseTopo().runMain(args);
}
protected void runMain(String[] args) throws Exception {
if (args.length == 0) {
submitTopologyLocalCluster(getTopologyKafkaSpout(), getConfig());
} else {
submitTopologyRemoteCluster(args[0], getTopologyKafkaSpout(), getConfig());
}
}
protected void submitTopologyLocalCluster(StormTopology topology, Config config) throws InterruptException {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("KafkaTest", config, topology);
stopWaitingForInput();
}
protected void submitTopologyRemoteCluster(String arg, StormTopology topology, Config config) throws Exception {
StormSubmitter.submitTopology(arg, config, topology);
}
protected void stopWaitingForInput() {
try {
System.out.println("PRESS ENTER TO STOP Now");
new BufferedReader(new InputStreamReader(System.in)).readLine();
System.exit(0);
} catch (IOException e) {
e.printStackTrace();
}
}
protected StormTopology getTopologyKafkaSpout() {
final TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("kafka_spout", new KafkaSpout<>(getKafkaSpoutConfig(getKafkaSpoutStreams())), 1);
// 1. 先用fastjson解析每条日志记录
builder.setBolt("json_parse", new JsonParseBolt()).shuffleGrouping("kafka_spout", STREAMS[0]);
// 2. 每隔60s去计算一下应用频道的统计量,固定窗口为时间增加60s
Duration oneMinite = new Duration(60, TimeUnit.SECONDS);// 60 -> 2
IWindowedBolt appChannelBolt = new AppChannelStatBolt()
.withTimestampField("timestamp")
.withLag(oneMinite)
.withWatermarkInterval(oneMinite)
.withTumblingWindow(oneMinite);
builder.setBolt("app_channel", appChannelBolt, 3)
.fieldsGrouping("json_parse", new Fields("timestamp")); //from app_channel change to timestamp
// 3. 将这些统计给到app整体统计,channel整体统计
IWindowedBolt appStatBolt = new AppStatBolt()
.withTimestampField("timestamp")
.withLag(oneMinite)
.withWatermarkInterval(oneMinite)
.withTumblingWindow(oneMinite);
builder.setBolt("app_stat", appStatBolt, 1)
.fieldsGrouping("app_channel", "stat", new Fields("appid"));
IWindowedBolt channelStatBolt = new ChannelStatBolt()
.withTimestampField("timestamp")
.withLag(oneMinite)
.withWatermarkInterval(oneMinite)
.withTumblingWindow(oneMinite);
builder.setBolt("channel_stat", channelStatBolt, 1)
.fieldsGrouping("app_channel", "stat", new Fields("channel"));
// 4. 写道mysql持久化保存
IWindowedBolt batchWriteBolt = new BatchWriteBolt()
.withTumblingWindow(new BaseWindowedBolt.Count(10));
builder.setBolt("batch_write", batchWriteBolt, 1)
.shuffleGrouping("app_channel", "sql")
.shuffleGrouping("app_stat", "sql")
.shuffleGrouping("channel_stat", "sql");
return builder.createTopology();
}
protected Config getConfig() {
Config config = new Config();
config.setDebug(true);
config.put("topology.message.timeout.secs", 1000);
return config;
}
protected KafkaSpoutConfig<String, String> getKafkaSpoutConfig(KafkaSpoutStreams kafkaSpoutStreams) {
return new KafkaSpoutConfig.Builder<>(getKafkaConsumerProps(), kafkaSpoutStreams, getTuplesBuilder(), getRetryService())
.setOffsetCommitPeriodMs(2000)
.setFirstPollOffsetStrategy(UNCOMMITTED_EARLIEST)
.setMaxUncommittedOffsets(50000)
.setPollTimeoutMs(2000)
.build();
}
protected KafkaSpoutRetryService getRetryService() {
return new KafkaSpoutRetryExponentialBackoff(TimeInterval.microSeconds(500),
TimeInterval.microSeconds(2), 35, TimeInterval.seconds(10));//change Integer.MAXVALUE to 3->50
}
protected Map<String, Object> getKafkaConsumerProps() {
Map<String, Object> props = new HashMap<>();
props.put(KafkaSpoutConfig.Consumer.BOOTSTRAP_SERVERS, "cstr-01:9092,cstr-02:9092,cstr-03:9092");
props.put(KafkaSpoutConfig.Consumer.GROUP_ID, "storm2");
props.put(KafkaSpoutConfig.Consumer.KEY_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer");
props.put(KafkaSpoutConfig.Consumer.VALUE_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer");
//props.put(KafkaSpoutConfig.Consumer.ENABLE_AUTO_COMMIT, "true");
//props.put(KafkaSpoutConfig.Consumer.AUTO_COMMIT_INTERVAL_MS, "5000");
// add resolve commit failure param
//props.put("session.timeout.ms", "50000"); //increase
//props.put("max.poll.records", "50000"); //reduce
return props;
}
protected KafkaSpoutTuplesBuilder<String, String> getTuplesBuilder() {
return new KafkaSpoutTuplesBuilderNamedTopics.Builder<String, String>(
new OnlineTupleBuilder<>(TOPICS[0]))
.build();
}
protected KafkaSpoutStreams getKafkaSpoutStreams() {
final Fields outputFields = new Fields("topic", "partition", "offset", "value");
return new KafkaSpoutStreamsNamedTopics.Builder(outputFields, STREAMS[0], new String[]{TOPICS[0]})
.build();
}
} }
When I changed KafkaSpout.java to print offsets of consumerRecords I find that some offset skipped. 当我更改KafkaSpout.java以打印ConsumerRecords的偏移量时,我发现跳过了一些偏移量。 Skipped offset image in http://7xtjbx.com1.z0.glb.clouddn.com/stack.png
在跳过的偏移图像http://7xtjbx.com1.z0.glb.clouddn.com/stack.png
What should I do for this problem?Is that storm-kafka-client use new consumer has some problem? 我该怎么办?storm-kafka-client使用新用户是否有问题? Thanks!
谢谢!
I resolve this problem when I use auto commit. 使用自动提交时,可以解决此问题。
props.put(KafkaSpoutConfig.Consumer.ENABLE_AUTO_COMMIT, "true");
props.put(KafkaSpoutConfig.Consumer.AUTO_COMMIT_INTERVAL_MS, "5000");
You maybe find a NullPointException and resolve it with try catch is OK.And you need remove numUncommittedOffsets++
in KafkaSpout.java 297 around. 您可能会发现一个NullPointException并尝试try来解决它是确定的。并且您需要在KafkaSpout.java 297中删除
numUncommittedOffsets++
。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.