[英]How to convert fold() to an AggregateFunction in Flink for WindowFunction?
我試圖通過刪除不推薦使用的類將Flink的舊Yahoo流基准測試版本轉換為新版本。
我現在停留在將不贊成使用的fold()轉換為aggregate()。 我無法將fold的現有參數映射到聚合參數。
//old version using fold
val windowedCounts = windowedEvents.fold(new WindowedCount(null, "", 0, new java.sql.Timestamp(0L)),
(acc: WindowedCount, r: (String, String, Timestamp)) => {
val lastUpdate = if (acc.lastUpdate.getTime < r._3.getTime) r._3 else acc.lastUpdate
acc.count += 1
acc.lastUpdate = lastUpdate
acc
},
(key: Tuple, window: TimeWindow, input: Iterable[WindowedCount], out: Collector[WindowedCount]) => {
val windowedCount = input.iterator.next()
println(windowedCount.lastUpdate)
out.collect(new WindowedCount(new java.sql.Timestamp(window.getStart), key.getField(0), windowedCount.count, windowedCount.lastUpdate))
//out.collect(new WindowedCount(new java.sql.Timestamp(window.getStart), key.getField(0), windowedCount.count, windowedCount.lastUpdate))
}
)
val windowedCounts = windowedEvents.aggregate(new CountAggregate)
我想通過擴展AggregateFunction類(類似)來創建CountAggregate類:
class CountAggregate extends AggregateFunction[(String, String, Timestamp), WindowedCount, Collector[WindowedCount]] {
override def createAccumulator() = WindowedCount(null, "", 0, new java.sql.Timestamp(0L))
override def accumulate(acc: WindowedCount, r: (String, String, Timestamp)): WindowedCount = {
val lastUpdate = if (acc.lastUpdate.getTime < r._3.getTime) r._3 else acc.lastUpdate
acc.count += 1
acc.lastUpdate = lastUpdate
acc
}
override def getValue (acc: WindowedCount) = { (key: Tuple, window: TimeWindow, input: Iterable[WindowedCount], out: Collector[WindowedCount]) =>
val windowedCount = input.iterator.next()
println(windowedCount.lastUpdate)
out.collect(new WindowedCount(new java.sql.Timestamp(window.getStart), key.getField(0), windowedCount.count, windowedCount.lastUpdate))
}
重寫CountAggregate類的任何幫助將不勝感激。
您需要指定一個AggregateFunction
以及一個ProcessWindowFunction
來執行最后的getValue
步驟:
val windowedCounts = windowedEvents.aggregate(
new CountAggregate(),
new WindowAggregateFunction())
class CountAggregate extends AggregateFunction[(String, String, Timestamp), WindowedCount, WindowedCount] {
override def createAccumulator() = WindowedCount(null, "", 0, new java.sql.Timestamp(0L))
override def add(value: (String, String, Timestamp), acc: WindowedCount): WindowedCount = {
val lastUpdate = if (acc.lastUpdate.getTime < value._3.getTime) value._3 else acc.lastUpdate
WindowedCount(null, "", acc.count + 1, lastUpdate)
}
override def getResult(accumulator: WindowedCount): WindowedCount = {
accumulator
}
override def merge(a: WindowedCount, b: WindowedCount): WindowedCount = {
WindowedCount(null, "", a.count + b.count, if (a.lastUpdate.getTime < b.lastUpdate.getTime) b.lastUpdate else a.lastUpdate)
}
}
class WindowAggregateFunction extends ProcessWindowFunction[WindowedCount, WindowedCount, Tuple, TimeWindow]() {
override def process(key: Tuple, context: Context, elements: Iterable[WindowedCount], out: Collector[WindowedCount]): Unit = {
val windowedCount = elements.iterator.next()
out.collect(WindowedCount(new java.sql.Timestamp(context.window.getStart), key.getField(0), windowedCount.count, windowedCount.lastUpdate))
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.