Our goal is to achieve the following architecture. And most importantly is to be able to read all the data of topic T1 (from all partitions).
The problem we are facing is that we are not able to make the join between two nodes that are created from different builders (there is two different builders in every instance). In every instance we created two builders (B1, B2). B1 creates a source processor that reads data from all partitions of T1 topic, so every instance has a unique ID. B2 reads data from one partition of one partition of T2. Later, when we do join, we get this error Invalid topology: StateStore aggregated-stream-store is not added yet cuz B1 and B2 have different APP_ID.
This is our code:
class StrmApp
public class StrmApp extends StrmProc {
private StreamsBuilder myBuilder;
private Validator<String, Data> dataValidator;
private Properties ownBuilderProps;
private KafkaStreams ownStreams;
public StrmApp(ValidDataService dataService, ProcessConfig config, ProcessListener listener) {
super(dataService, config, listener);
myBuilder = new StreamsBuilder();
dataValidator = getValidDataService().getValidator(String.class, Data.class);
ownBuilderProps = new Properties();
ownBuilderProps.putAll(getProperties());
// Unique ID for each instance (different consumer group)
ownBuilderProps.put(StreamsConfig.APPLICATION_ID_CONFIG, UUID.randomUUID());
}
private KTable<String, TheDataList> globalStream() {
// KStream of records from T1 topic using String and TheDataSerde deserializers
KStream<String, Data> trashStream = getOwnBuilder().stream("T1", Consumed.with(Serdes.String(), SerDes.TheDataSerde));
// Apply an aggregation operation on the original KStream records using an intermediate representation of a KStream (KGroupedStream)
KGroupedStream<String, Data> kGroupedStream = trashStream.groupByKey();
// Describe how a StateStore should be materialized (as a KTable).
// In our case we are using the default RocksDB back-ends by providing "vdp-aggregated-stream-store" as a state store name
Materialized<String, TheDataList, KeyValueStore<Bytes, byte[]>> materialized = Materialized.as("aggregated-stream-store");
materialized = materialized.withValueSerde(SerDes.TheDataListSerde);
// Return a KTable
return kGroupedStream.aggregate(() -> new TheDataList(), (key, value, aggregate) -> {
if (!value.getValideData())
aggregate.getList().removeIf((t) -> t.getTimestamp() <= value.getTimestamp());
else
aggregate.getList().add(value);
return aggregate;
}, materialized);
}
private Data tombstone(String Vid) {
Data d = new Data();
d.setVid(Vid);
d.setValideData(false);
d.setTimestamp(System.currentTimeMillis());
return d;
}
@Override
public void run() {
/* read from topic 2 (T2) - we want to only read one partition */
KStream<String, Data> inStream = getBuilder()
.stream(getProcessConfig().getTopicConfig().getTopicIn(), Consumed.with(Serdes.String(), SerDes.TheDataSerde))
.filter(getValidDataService().getValidator(String.class, Data.class));
/* Read all partitions from topic 1 (T1) - we want to read from all partitions (P1, P2 and P3) */
KTable<String, TheDataList> ft = globalStream();
// ERROR: Invalid topology: StateStore vdp-aggregated-stream-store is not added yet.
// When it comes to do the join it raises this error
// I think because two builders have different APP_ID
logger.warn("##JOIN:");
/* join bteween data coming from T1 with data coming from T2 */
KStream<String, TheDataList> validated = inStream.join(ft,
new ValueJoiner<Data, TheDataList, TheDataList>() {
@Override
public TheDataList apply(Data valid, TheDataList ivalids) {
ivalids.getList().forEach((c) -> {
dataValidator.validate(c, valid);
});
return ivalids;
}
});
// ...... some code
ownStreams = StreamTools.startKStreams(getOwnBuilder(), getOwnBuilderProps(), this, this);
super.startStreams();
}
private Properties getOwnBuilderProps() {
return ownBuilderProps;
}
private StreamsBuilder getOwnBuilder() {
// return getBuilder();
return myBuilder;
}
// .......
}
class StrmProc
public abstract class StrmProc extends AProcess {
private final StreamsBuilder builder;
public StrmProc(ValidDataService dataService, ProcessConfig config, ProcessListener listener) {
super(dataService, config, listener);
this.builder = new StreamsBuilder();
}
protected final StreamsBuilder getBuilder() {
return builder;
}
protected final KafkaStreams startStreams() {
return StreamTools.startKStreams(getBuilder(), getProperties(), this, this);
}
// ........
}
class AProcess
public abstract class AProcess implements Process {
private final Properties propertie;
private final ProcessConfig config;
private final ValidDataService dataService;
private final ProcessListener listener;
protected AProcess(ValidDataService dataService, ProcessConfig config, ProcessListener listener) {
super();
this.dataService = dataService;
this.propertie = getProperties(config);
this.config = config;
this.listener = listener;
}
private Properties getProperties(ProcessConfig config) {
Properties kafkaProperties = new Properties();
kafkaProperties = new Properties();
kafkaProperties.put(StreamsConfig.APPLICATION_ID_CONFIG, config.getApp());
kafkaProperties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, config.getBootstrapServerUrl());
kafkaProperties.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
return kafkaProperties;
}
protected Properties getProperties() {
return propertie;
}
protected ProcessConfig getProcessConfig() {
return config;
}
protected ValidDataService getValidDataService() {
return dataService;
}
// .......
}
Please, how to achieve this with Kafka streams?
in order to use join on Kafka Streams, you need to use a single StreamsBuilder
instance and not two (in your case, two of them - variables inStream
and ft
).
usually Kafka Streams throws exception TopologyException: Invalid topology: StateStore is not added yet
if KeyValueStore is not added into StreamsBuilder
instance: streamsBuilder.addStateStore(storeBuilder)
.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.