[英]SELECT values from JSON type column in BigQuery
我有以下 SQL 将数据插入 BigQuery 中的 JSON 类型列
DROP TABLE MMP.tmpJourneys;
CREATE TABLE MMP.tmpJourneys (
id INT64 NOT NULL,
data JSON
);
INSERT INTO MMP.tmpJourneys (id, data)
VALUES
(1, JSON '{"journey_id":1,"transport_type":"train","origin":"London","destination":"Manchester","origin_time":"09:00","destination_time":"12:00","duration_mins":180,"intermediate_stops":[{"station":"Birmingham","arrival_time":"10:00","departure_time":"10:15"},{"station":"Crewe","arrival_time":"12:30","departure_time":"12:45"}]}'),
(2, JSON '{"journey_id":2,"transport_type":"bus","origin":"Manchester","destination":"Liverpool","origin_time":"10:00","destination_time":"12:00","duration_mins":120,"intermediate_stops":[{"station":"Warrington","arrival_time":"11:30","departure_time":"11:45"},{"station":"StHelens","arrival_time":"13:00","departure_time":"13:15"}]}'),
(3, JSON '{"journey_id":3,"transport_type":"scooter","origin":"Liverpool","destination":"Birmingham","origin_time":"13:00","destination_time":"14:30","duration_mins":90,"intermediate_stops":[{"station":"Warrington","arrival_time":"14:30","departure_time":"14:45"}]}');
SELECT * FROM MMP.tmpJourneys;
这些数据表示具有一组嵌套的中间位置的公共交通旅程。
如何查询 intermediate_stops 重复组中的数据?
SELECT JSON_EXTRACT_SCALAR(data, '$.journey_id') AS journey_id,
JSON_EXTRACT_SCALAR(data, '$.transport_type') AS transport_type,
JSON_EXTRACT_SCALAR(data, '$.origin') AS transport_type,
JSON_EXTRACT_SCALAR(data, '$.destination') AS transport_type,
JSON_EXTRACT_SCALAR(data, '$.origin_time') AS transport_type,
JSON_EXTRACT_SCALAR(data, '$.duration_mins') AS transport_type,
JSON_EXTRACT(data, '$.intermediate_stops') AS intermediate_stops,
JSON_EXTRACT_SCALAR(stops, '$.station') AS station1,
-- intermediate_stops
-- ARRAY(SELECT AS STRUCT
-- location,
-- time
-- FROM UNNEST(JSON_EXTRACT(data, '$.intermediate_stops'))
-- ) AS intermediate_stops
FROM MMP.tmpJourneys,
UNNEST(JSON_QUERY_ARRAY(data.intermediate_stops)) AS stops;
看起来你几乎接近答案了。 考虑以下查询。
WITH tmpJourneys AS (
SELECT 1 id, JSON '{"journey_id":1,"transport_type":"train","origin":"London","destination":"Manchester","origin_time":"09:00","destination_time":"12:00","duration_mins":180,"intermediate_stops":[{"station":"Birmingham","arrival_time":"10:00","departure_time":"10:15"},{"station":"Crewe","arrival_time":"12:30","departure_time":"12:45"}]}' data UNION ALL
SELECT 2, JSON '{"journey_id":2,"transport_type":"bus","origin":"Manchester","destination":"Liverpool","origin_time":"10:00","destination_time":"12:00","duration_mins":120,"intermediate_stops":[{"station":"Warrington","arrival_time":"11:30","departure_time":"11:45"},{"station":"StHelens","arrival_time":"13:00","departure_time":"13:15"}]}' UNION ALL
SELECT 3, JSON '{"journey_id":3,"transport_type":"scooter","origin":"Liverpool","destination":"Birmingham","origin_time":"13:00","destination_time":"14:30","duration_mins":90,"intermediate_stops":[{"station":"Warrington","arrival_time":"14:30","departure_time":"14:45"}]}'
)
SELECT JSON_VALUE(data, '$.journey_id') AS journey_id,
JSON_VALUE(data, '$.transport_type') AS transport_type,
-- ...
ARRAY(
SELECT AS STRUCT
JSON_VALUE(e, '$.station') AS station,
JSON_VALUE(e, '$.arrival_time') AS arrival_time,
JSON_VALUE(e, '$.departure_time') AS departure_time
FROM UNNEST(JSON_QUERY_ARRAY(data, '$.intermediate_stops')) e
) AS intermediate_stops
FROM tmpJourneys t;
查询结果
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.