[英]Snowflake - flatten multiple nested array values from json variant column
我有一个 JSON 数据,我想从中提取键“文本”的值,并在单行中分隔。 非常感谢任何帮助实现所需的 output 的帮助。
样本 JSON 数据:
{
"expand": "schema,names",
"issues": [
{
"id": "123456",
"key": "XYZ-123",
"fields": {
"customfield_10000": "abcd",
"customfield_10001": 7,
"customfield_10002": null,
"description": {
"version": 1,
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 1"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 2"
},
{
"type": "text",
"text": "some text value 3",
"marks": [
{
"type": "link",
"attrs": {
"href": "some ref"
}
}
]
},
{
"type": "text",
"text": "some text value 4"
}
]
},
{
"type": "blockquote",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 5"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "inlineCard",
"attrs": {
"url": "some url"
}
},
{
"type": "text",
"text": "some text value 6"
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 7"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 8"
},
{
"type": "text",
"text": "some text value 9",
"marks": [
{
"type": "link",
"attrs": {
"href": "some link"
}
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 10"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 11"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 12"
}
]
}
]
}
}
}
]
}
所需的 output:
ISSUE_ID ISSUE_KEY CF_10000 CF_10001 CF_10002 DESCRIPTION
123456 XYZ-123 abcd 7 null some text value 1|some text value 2|some text value 3.....
我正在使用以下查询来获取 arrays 值。 但是,我希望将 arrays 中的键“文本”值填充为上述所需格式。
select
ISSUE.value:id::number as ISSUE_ID,
ISSUE.value:key::varchar as ISSUE_KEY,
ISSUE.value:fields.customfield_10000::varchar as CF_10000,
ISSUE.value:fields.customfield_10001::number as CF_10001,
ISSUE.value:fields.customfield_10002::varchar as CF_10002,
ISSUE.value:fields.description.content::varchar as DESCRIPTION
from
VARIANT_TABLE,
lateral flatten( input => payload_json:issues, outer => true) as ISSUE
我创建了一个 UDF,用于将 JSON 数组 object 键值提取到数组的字符串中,但这并不能帮助我从上面共享的 JSON 中获得所需的 output,因为它在对象内部嵌套了 arrays。
create or replace function UDF_ARRAY_OBJECT_TO_STRING_ARRAY(a array, b varchar)
returns array
language javascript
strict
comment = 'UDF to extract JSON array object key value into string of array. A refers to input array and B refers to extract which key from the array object'
as $$
return A.map(function(d) {return d[B]});
$$;
你那里的 arrays 比你在横向展平中处理的要多得多。 有了更多的展平和一个 listagg() function,你应该可以做到这一点。 请注意,您可能需要按索引而不是字段值进行分组,具体取决于您要访问的内容,但这会给出您在示例中寻找的结果:
WITH x AS (
SELECT parse_json('{
"expand": "schema,names",
"issues": [
{
"id": "123456",
"key": "XYZ-123",
"fields": {
"customfield_10000": null,
"customfield_10001": null,
"customfield_10002": null,
"description": {
"version": 1,
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 1"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 2"
},
{
"type": "text",
"text": "some text value 3",
"marks": [
{
"type": "link",
"attrs": {
"href": "some ref"
}
}
]
},
{
"type": "text",
"text": "some text value 4"
}
]
},
{
"type": "blockquote",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 5"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "inlineCard",
"attrs": {
"url": "some url"
}
},
{
"type": "text",
"text": "some text value 6"
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 7"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 8"
},
{
"type": "text",
"text": "some text value 9",
"marks": [
{
"type": "link",
"attrs": {
"href": "some link"
}
}
]
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 10"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 11"
}
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "some text value 12"
}
]
}
]
}
}
}
]
}') as payload_json)
select
issue.value:id::number as ISSUE_ID,
issue.value:key::varchar as ISSUE_KEY,
ISSUE.value:fields.customfield_10000::varchar as CF_10000,
ISSUE.value:fields.customfield_10001::number as CF_10001,
ISSUE.value:fields.customfield_10002::varchar as CF_10002,
listagg(content2.value:text::varchar,'|') as description
from
x,
lateral flatten( input => x.payload_json:issues, outer => true) as issue,
lateral flatten( input => issue.value:fields:description:content, outer => true) as content,
lateral flatten( input => content.value:content, outer => true) as content2
group by 1,2,3,4,5;
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.