[英]How to import nested json into google big query
I'm inserting JSON into Google Big Query.我将 JSON 插入到 Google Big Query 中。 At the bottom of the question is the schema of the JSON.问题的底部是 JSON 的架构。
Below is an example JSON:下面是一个示例 JSON:
{
"_index":"data",
"_type":"collection_v1",
"_id":"548d035f23r8987b768a5e60",
"_score":1,
"_source":{
"fullName":"Mike Smith",
"networks":[
{
"id":[
"12923449"
],
"network":"facebook",
"link":"https://www.facebook.com/127654449"
}
],
"sex":{
"network":"facebook",
"value":"male"
},
"interests":{
},
"score":1.045,
"merged_by":"548f899444v5t4v45te9a4cc"
}
}
as you can see there's a "_source.fullName" field with "Mike Smith".如您所见,有一个带有“Mike Smith”的“_source.fullName”字段。
When I try to create a table with it, it errors out:当我尝试用它创建一个表时,它会出错:
Array specified for non-repeated field: _source.fullName.为非重复字段指定的数组:_source.fullName。
I believe this field is a one-time field for _source.我相信这个字段是 _source 的一次性字段。 How do I overcome this error?我该如何克服这个错误?
here's the Schema:这是架构:
[
{
"name": "_index",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "_id",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "_type",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "score",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "header",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "fullName",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "src",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "avatar",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "merged_by",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "cover",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "sex",
"type": "RECORD",
"mode": "NULLABLE",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "_source",
"type": "RECORD",
"mode": "NULLABLE",
"fields": [
{
"name": "fullName",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "links",
"type": "STRING",
"mode": "REPEATED"
},
{
"name": "birthday",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "phones",
"type": "STRING",
"mode": "REPEATED"
},
{
"name": "pictures",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "url",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tab",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "contacts",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "id",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "fullName",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "groups",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "id",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "Name",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "skills",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "relations",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "about",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "emails",
"type": "STRING",
"mode": "REPEATED"
},
{
"name": "languages",
"type": "STRING",
"mode": "REPEATED"
},
{
"name": "places",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "type",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "education",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "school",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "experience",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "start",
"type": "NUMERIC",
"mode": "NULLABLE"
},
{
"name": "company",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "title",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "networks",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "link",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "id",
"type": "STRING",
"mode": "REPEATED"
}
]
},
{
"name": "network",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "others",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "books",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "music",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "games",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "spotify",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "network",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "tag",
"type": "STRING",
"mode": "NULLABLE"
}
]
}
]
}
]
}
]
You could import the full json row as if it was a CSV - basically a one column BigQuery table of json objects.您可以导入完整的 json 行,就好像它是 CSV - 基本上是 json 对象的一列 BigQuery 表。 Then you can parse the JSON at will inside BigQuery, with queries like this:然后,您可以在 BigQuery 中随意解析 JSON,查询如下:
WITH j AS (
SELECT """{"_index":"data","_type":"collection_v1","_id":"548d035f23r8987b768a5e60","_score":1,"_source":{"fullName":"Mike Smith","networks":[{"id":["12923449"],"network":"facebook","link":"https://www.facebook.com/127654449"}],"sex":{"network":"facebook","value":"male"},"interests":{},"score":1.045,"merged_by":"548f899444v5t4v45te9a4cc"}}""" j
)
SELECT index
, STRUCT(
JSON_EXTRACT_SCALAR(source, '$.fullName') AS fullName
, [
STRUCT(
JSON_EXTRACT_SCALAR(source, '$.networks[0].id[0]') AS id
, JSON_EXTRACT_SCALAR(source, '$.networks[0].network') AS network
, JSON_EXTRACT_SCALAR(source, '$.networks[0].link') AS link)
] AS networks
) source
FROM (
SELECT JSON_EXTRACT_SCALAR(j.j, '$._index') index
, JSON_EXTRACT(j.j, '$._source') source
FROM j
)
See:看:
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.