繁体   English   中英

BigQuery 中 JSON 类型列的 SELECT 个值

[英]SELECT values from JSON type column in BigQuery

我有以下 SQL 将数据插入 BigQuery 中的 JSON 类型列

DROP TABLE MMP.tmpJourneys;

CREATE TABLE MMP.tmpJourneys (
  id INT64 NOT NULL,
  data JSON
);

INSERT INTO MMP.tmpJourneys (id, data)
VALUES 
(1, JSON '{"journey_id":1,"transport_type":"train","origin":"London","destination":"Manchester","origin_time":"09:00","destination_time":"12:00","duration_mins":180,"intermediate_stops":[{"station":"Birmingham","arrival_time":"10:00","departure_time":"10:15"},{"station":"Crewe","arrival_time":"12:30","departure_time":"12:45"}]}'),
(2, JSON '{"journey_id":2,"transport_type":"bus","origin":"Manchester","destination":"Liverpool","origin_time":"10:00","destination_time":"12:00","duration_mins":120,"intermediate_stops":[{"station":"Warrington","arrival_time":"11:30","departure_time":"11:45"},{"station":"StHelens","arrival_time":"13:00","departure_time":"13:15"}]}'),
(3, JSON '{"journey_id":3,"transport_type":"scooter","origin":"Liverpool","destination":"Birmingham","origin_time":"13:00","destination_time":"14:30","duration_mins":90,"intermediate_stops":[{"station":"Warrington","arrival_time":"14:30","departure_time":"14:45"}]}');

SELECT * FROM MMP.tmpJourneys;

这些数据表示具有一组嵌套的中间位置的公共交通旅程。

如何查询 intermediate_stops 重复组中的数据?

SELECT JSON_EXTRACT_SCALAR(data, '$.journey_id') AS journey_id,
       JSON_EXTRACT_SCALAR(data, '$.transport_type') AS transport_type,
       JSON_EXTRACT_SCALAR(data, '$.origin') AS transport_type,
       JSON_EXTRACT_SCALAR(data, '$.destination') AS transport_type,
       JSON_EXTRACT_SCALAR(data, '$.origin_time') AS transport_type,
       JSON_EXTRACT_SCALAR(data, '$.duration_mins') AS transport_type,       
       JSON_EXTRACT(data, '$.intermediate_stops') AS intermediate_stops,
       JSON_EXTRACT_SCALAR(stops, '$.station') AS station1,
       
      --  intermediate_stops
      --  ARRAY(SELECT AS STRUCT 
      --               location, 
      --               time
      --          FROM UNNEST(JSON_EXTRACT(data, '$.intermediate_stops'))
      --       ) AS intermediate_stops
  FROM MMP.tmpJourneys,
  UNNEST(JSON_QUERY_ARRAY(data.intermediate_stops)) AS stops;

看起来你几乎接近答案了。 考虑以下查询。

WITH tmpJourneys AS (
  SELECT 1 id, JSON '{"journey_id":1,"transport_type":"train","origin":"London","destination":"Manchester","origin_time":"09:00","destination_time":"12:00","duration_mins":180,"intermediate_stops":[{"station":"Birmingham","arrival_time":"10:00","departure_time":"10:15"},{"station":"Crewe","arrival_time":"12:30","departure_time":"12:45"}]}' data UNION ALL
  SELECT 2, JSON '{"journey_id":2,"transport_type":"bus","origin":"Manchester","destination":"Liverpool","origin_time":"10:00","destination_time":"12:00","duration_mins":120,"intermediate_stops":[{"station":"Warrington","arrival_time":"11:30","departure_time":"11:45"},{"station":"StHelens","arrival_time":"13:00","departure_time":"13:15"}]}' UNION ALL
  SELECT 3, JSON '{"journey_id":3,"transport_type":"scooter","origin":"Liverpool","destination":"Birmingham","origin_time":"13:00","destination_time":"14:30","duration_mins":90,"intermediate_stops":[{"station":"Warrington","arrival_time":"14:30","departure_time":"14:45"}]}'
)
SELECT JSON_VALUE(data, '$.journey_id') AS journey_id,
       JSON_VALUE(data, '$.transport_type') AS transport_type,
       -- ...
       ARRAY(
         SELECT AS STRUCT
                JSON_VALUE(e, '$.station') AS station,
                JSON_VALUE(e, '$.arrival_time') AS arrival_time,
                JSON_VALUE(e, '$.departure_time') AS departure_time
           FROM UNNEST(JSON_QUERY_ARRAY(data, '$.intermediate_stops')) e
       ) AS intermediate_stops
  FROM tmpJourneys t;

查询结果

在此处输入图像描述

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM