I am trying to extract all values after ":" from a nested field in Google BigQuery and add them to a separate column. However, the gets returned empty.
I think the problem is with the regex expression used, r'^:(.*?)'
SELECT
ARRAY(
SELECT regexp_extract(x, r'^(.*?)\:')
FROM UNNEST(split(GCAM,',')) AS x
WHERE regexp_extract(x, r'^(.*?)\:') IS NOT NULL
) AS GCAM_field,
ARRAY(
SELECT regexp_extract(x, r'^\:(.*?)')
FROM UNNEST(split(GCAM,',')) AS x
WHERE regexp_extract(x, r'^\:(.*?)') IS NOT NULL
) AS GCAM_value
FROM `gdelt-bq.gdeltv2.gkg_partitioned`
WHERE _PARTITIONTIME BETWEEN TIMESTAMP('2019-02-02') AND TIMESTAMP('2019-02-02')
The expected result is a column "GCAM_field" with all values before ":" and a column "GCAM_value" with all values after ":". However the latter gets returned empty.
#standardSQL
SELECT
ARRAY(
SELECT REGEXP_EXTRACT(x, r'^(.*?):')
FROM UNNEST(SPLIT(GCAM,',')) AS x
WHERE REGEXP_EXTRACT(x, r'^(.*?):') IS NOT NULL
) AS GCAM_field,
ARRAY(
SELECT REGEXP_EXTRACT(x, r':(.*?)$')
FROM UNNEST(SPLIT(GCAM,',')) AS x
WHERE REGEXP_EXTRACT(x, r':(.*?)$') IS NOT NULL
) AS GCAM_value
FROM `gdelt-bq.gdeltv2.gkg_partitioned`
WHERE _PARTITIONTIME BETWEEN TIMESTAMP('2019-02-02') AND TIMESTAMP('2019-02-02')
or simpler:
#standardSQL
SELECT
ARRAY(
SELECT SPLIT(x, ':')[SAFE_OFFSET(0)]
FROM UNNEST(SPLIT(GCAM,',')) AS x
WHERE SPLIT(x, ':')[SAFE_OFFSET(0)] IS NOT NULL
) AS GCAM_field,
ARRAY(
SELECT SPLIT(x, ':')[SAFE_OFFSET(1)]
FROM UNNEST(SPLIT(GCAM,',')) AS x
WHERE SPLIT(x, ':')[SAFE_OFFSET(1)] IS NOT NULL
) AS GCAM_value
FROM `gdelt-bq.gdeltv2.gkg_partitioned`
WHERE _PARTITIONTIME BETWEEN TIMESTAMP('2019-02-02') AND TIMESTAMP('2019-02-02')
one more way to solve this problem
with
sample_data as (
select
*
from
unnest(
array[
struct(1 as id, 'bbb:111,aaa:222' as gcam),
struct(2 as id, 'qqq:,k:3,:777,xxx:555:&&&' as gcam)
]
)
)
select
regexp_extract_all(
concat(',', gcam, ','), r',(.*?)\:') as gcam_field,
regexp_extract_all(
concat(',', gcam, ','), r'\:(.*?),') as gcam_value
from
sample_data
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.