I'm extracting some data from BigQuery using the Python client library and I'm trying to transform the output into the following format:
{
"2020": [
{
'month': 'Nov',
'cat1': 11.0,
'cat2': 89.0,
'cat3': 3.0,
'cat4': 15.0,
'cat5': 7.0,
'cat6': 1.0,
'cat7': 63.0,
'other': 0.0
},
etc.....
],
"2021": {
{
'month': 'Nov',
'cat1': 11.0,
'cat2': 89.0,
'cat3': 3.0,
'cat4': 15.0,
'cat5': 7.0,
'cat6': 1.0,
'cat7': 63.0,
'other': 0.0}
}
}
Here's the data:
from google.cloud import bigquery
bq_client = bigquery.Client()
sql_query = "SELECT ..."
query_job = bq_client.query(sql_query)
for row in query_job:
print(row)
print()
Row((2020, 'Mar', 4.0, 96.0, 0.0, 5.0, 5.0, 0.0, 85.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2020, 'Apr', 52.0, 48.0, 0.0, 9.0, 4.0, 0.0, 35.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2020, 'May', 22.0, 78.0, 0.0, 14.0, 8.0, 0.0, 56.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2020, 'Jun', 46.0, 54.0, 0.0, 13.0, 7.0, 0.0, 34.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2020, 'Jul', 16.0, 84.0, 0.0, 20.0, 11.0, 2.0, 51.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2020, 'Aug', 13.0, 87.0, 0.0, 22.0, 5.0, 4.0, 55.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2020, 'Sep', 14.0, 86.0, 0.0, 19.0, 4.0, 4.0, 59.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2020, 'Oct', 5.0, 95.0, 0.0, 19.0, 5.0, 5.0, 66.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2020, 'Nov', 11.0, 89.0, 3.0, 15.0, 7.0, 1.0, 63.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2020, 'Dec', 8.0, 92.0, 36.0, 13.0, 2.0, 2.0, 40.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Row((2021, 'Jan', 21.0, 79.0, 7.0, 13.0, 4.0, 2.0, 54.0, 0.0), {'year': 0, 'month': 1, 'cat1': 2, 'cat2': 3, 'cat3': 4, 'cat4': 5, 'cat5': 6, 'cat6': 7, 'cat7': 8, 'other': 9})
Here's what I've got so far:
data = {row.year for row in query_job}
print(data)
data = {}
for row in query_job:
data[str(row.year)] = None
values = []
monthly_data = {}
monthly_data["month"] = row.month
monthly_data["cat1"] = row.cat1
monthly_data["cat2"] = row.cat2
monthly_data["cat3"] = row.cat3
monthly_data["cat4"] = row.cat4
monthly_data["cat5"] = row.cat5
monthly_data["cat6"] = row.cat6
monthly_data["cat7"] = row.cat7
monthly_data["other"] = row.other
values.append(monthly_data)
data[str(row.year)] = values
data
Problem is I'm only getting the last entries for each year like so:
{'2020': [{'month': 'Dec',
'cat1': 8.0,
'cat2': 92.0,
'cat3': 36.0,
'cat4': 13.0,
'cat5': 2.0,
'cat6': 2.0,
'cat7': 40.0,
'other': 0.0}],
'2021': [{'month': 'Jan',
'cat1': 21.0,
'cat2': 79.0,
'cat3': 7.0,
'cat4': 13.0,
'cat5': 4.0,
'cat6': 2.0,
'cat7': 54.0,
'other': 0.0}]}
Resulting data will eventually write to.json file.
Python google bigquery
The issue is that data[str(row.year)] = None
is executed for every row and data[str(row.year)] = values
overwrites instead of appending. Try:
from collections import defaultdict
data = defaultdict(list)
for row in query_job:
monthly_data = {}
monthly_data["month"] = row.month
monthly_data["cat1"] = row.cat1
monthly_data["cat2"] = row.cat2
monthly_data["cat3"] = row.cat3
monthly_data["cat4"] = row.cat4
monthly_data["cat5"] = row.cat5
monthly_data["cat6"] = row.cat6
monthly_data["cat7"] = row.cat7
monthly_data["other"] = row.other
data[str(row.year)].append(monthly_data)
data
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.