[英]How to check missing values in Clickhouse
我有一個表格,每 15 分鍾就會填充一次數據。 我需要檢查是否有整個期間所有天的數據。 有一個時間列,其中數據的格式為 yyyy-mm-dd hh:mm:ss
我找到了開始日期和最后日期
我發現你可以從這個間隔(開始日期和結束日期)生成一個日期數組,每一行都將與之進行比較,如果沒有匹配,這里就是缺失的日期。
我試過這個:
WITH dates_range AS (SELECT toDate(min(time)) AS start_date,
toDate(max(time)) AS end_date
FROM table)
SELECT dates
FROM (
SELECT arrayFlatten(arrayMap(x -> start_date + x, range(0, toUInt64(end_date - start_date)))) AS dates
FROM dates_range
)
LEFT JOIN (
SELECT toDate(time) AS date
FROM table
GROUP BY toDate(time)
) USING date
WHERE date IS NULL;
但它返回代碼:10。 DB::Exception:未找到塊中的列日期。 只有列:日期。 (NOT_FOUND_COLUMN_IN_BLOCK) 我不能
您還可以使用WITH FILL
修飾符https://clickhouse.com/docs/en/sql-reference/statements/select/order-by/#order-by-expr-with-fill-modifier
create table T ( time DateTime) engine=Memory
as SELECT toDateTime('2020-01-01') + (((number * 60) * 24) * if((number % 33) = 0, 3, 1))
FROM numbers(550);
SELECT *
FROM
(
SELECT
toDate(time) AS t,
count() AS c
FROM T
GROUP BY t
ORDER BY t ASC WITH FILL
)
WHERE c = 0
┌──────────t─┬─c─┐
│ 2020-01-11 │ 0 │
│ 2020-01-13 │ 0 │
│ 2020-01-16 │ 0 │
│ 2020-01-18 │ 0 │
│ 2020-01-21 │ 0 │
│ 2020-01-23 │ 0 │
│ 2020-01-26 │ 0 │
└────────────┴───┘
create table T ( time DateTime) engine=Memory as SELECT toDateTime('2020-01-01') + (((number * 60) * 24) * if((number % 33) = 0, 3, 1)) FROM numbers(550); select b from ( SELECT b, ((b - any(b) OVER (ORDER BY b ASC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING))) AS lag FROM ( SELECT toDate(time) AS b FROM T GROUP BY b ORDER BY b ASC )) where lag > 1 and lag < 10000 ┌──────────b─┐ │ 2020-01-12 │ │ 2020-01-14 │ │ 2020-01-17 │ │ 2020-01-19 │ │ 2020-01-22 │ │ 2020-01-24 │ │ 2020-01-27 │ └────────────┘
create table T ( time DateTime) engine=Memory as SELECT toDateTime('2020-01-01') + (((number * 60) * 24) * if((number % 33) = 0, 3, 1)) FROM numbers(550); WITH (SELECT (toDate(min(time)), toDate(max(time))) FROM T) as x SELECT l.*, r.* FROM ( SELECT arrayJoin(arrayFlatten(arrayMap(x -> x.1 + x, range(0, toUInt64(x.2 - x.1+1))))) AS date) l LEFT JOIN ( SELECT toDate(time) AS date FROM T GROUP BY toDate(time) ) r USING date WHERE r.date IS NULL settings join_use_nulls = 1; ┌───────date─┬─r.date─┐ │ 2020-01-11 │ ᴺᵁᴸᴸ │ │ 2020-01-13 │ ᴺᵁᴸᴸ │ │ 2020-01-16 │ ᴺᵁᴸᴸ │ │ 2020-01-18 │ ᴺᵁᴸᴸ │ │ 2020-01-21 │ ᴺᵁᴸᴸ │ │ 2020-01-23 │ ᴺᵁᴸᴸ │ │ 2020-01-26 │ ᴺᵁᴸᴸ │ └────────────┴────────┘
create table T ( time DateTime) engine=Memory
as SELECT toDateTime('2020-01-01') + (((number * 60) * 24) * if((number % 33) = 0, 3, 1))
FROM numbers(550);
WITH (SELECT (toDate(min(time)), toDate(max(time))) FROM T) as x
select date, sumIf(cnt, type=1) c1, sumIf(cnt, type=2) c2 from
( SELECT arrayJoin(arrayFlatten(arrayMap(x -> x.1 + x, range(0, toUInt64(x.2 - x.1+1))))) AS date, 2 type, 1 cnt
union all SELECT toDate(time) AS date, 1 type, count() cnt FROM T GROUP BY toDate(time) )
group by date
having c1 = 0 or c2 = 0;
┌───────date─┬─c1─┬─c2─┐
│ 2020-01-11 │ 0 │ 1 │
│ 2020-01-13 │ 0 │ 1 │
│ 2020-01-16 │ 0 │ 1 │
│ 2020-01-18 │ 0 │ 1 │
│ 2020-01-21 │ 0 │ 1 │
│ 2020-01-23 │ 0 │ 1 │
│ 2020-01-26 │ 0 │ 1 │
└────────────┴────┴────┘
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.