简体   繁体   中英

Referenced variable 'ro_sub_ros.$is_not_null' has levels of 1, while the corresponding field path to Parquet column has 0 repeated fields

BigQuery Python: google.api_core.exceptions.BadRequest: 400 Error while reading data, error message: Schema mismatch: referenced variable 'ro_sub_ros.$is_not_null' has array levels of 1, while the corresponding field path to Parquet column has 0 repeated fields.

My original data looks like this:

testData = {
    "ro_user_email": "tech@techietech.com",
    "ro_account_id": "23402042",
    "ro_sub_account_id": "34020334",
    "ro_name": "Test RO",
    "ro_number": "1304340",
    "ro_currency": {"label":"USD","value":"USD"},
    "ro_dates": {"from":now,"to":now},
    "ro_status": "draft",
    "ro_operation_timestamp": pd.Timestamp(now),
    "ro_billing_cycle": {"label":"Fortnightly","value":"Fortnightly"},
    "ro_sub_ros": [
        {
            "sub_ro_id": "2323",
            "valid":False,
            "sub_ro_name": "Testing",
            "sub_ro_dates":{"from":now,"to":now},
            "sub_ro_budget": 1203302.22,
            "sub_ro_revenue_price":1202302.22,
            "sub_ro_revenue_selected": {"label":"Fortnightly","value":"Fortnightly"},
            "sub_ro_revenue_model_selected": {"label":"Fortnightly","value":"Fortnightly"},
            "sub_ro_campaigns_selected": [{"label":"Fortnightly","value":"Fortnightly"}],
            "sub_ro_ios_selected": [{"label":"Fortnightly","value":"Fortnightly"}],
            "sub_ro_client_id": [{"label":"Fortnightly","value":"Fortnightly"}],
            "sub_ro_ids_selected": [{"label":"Fortnightly","value":"Fortnightly"}],
            "sub_ro_pixels_selected": [{"label":"Fortnightly","value":"Fortnightly"}],
            "kpi_1_metric_selected": {"label":"Fortnightly","value":"Fortnightly"},
            "attribution_model_selected": {"label":"Fortnightly","value":"Fortnightly"},
            "kpi_window_selected": {"label":"Fortnightly","value":"Fortnightly"},
            "deepMetrics_selected": {"label":"Fortnightly","value":"Fortnightly"},
            "sub_ro_kpi_goal":"ROI"

        }
    ],

}

And here's how I created my BQ Schema:

schema = [
        bigquery.SchemaField("ro_user_email", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("ro_account_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("ro_sub_account_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("ro_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("ro_number", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("ro_currency", 
        "STRUCT", 
        mode="REQUIRED",
        fields=[
            bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
            bigquery.SchemaField("value", "STRING", mode="REQUIRED"),
            ]
            ),

        bigquery.SchemaField("ro_dates", 
        "STRUCT", 
        mode="REQUIRED",
        fields=[
            bigquery.SchemaField("from", "DATE", mode="REQUIRED"),
            bigquery.SchemaField("to", "DATE", mode="REQUIRED"),
            ]
            ),

        bigquery.SchemaField("ro_status","STRING", mode="REQUIRED"),
        bigquery.SchemaField("ro_operation_timestamp","TIMESTAMP", mode="REQUIRED"),

        bigquery.SchemaField("ro_billing_cycle", 
        "STRUCT", 
        mode="REQUIRED",
        fields=[
            bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
            bigquery.SchemaField("value", "STRING", mode="REQUIRED"),
            ]
            ),
        
        bigquery.SchemaField(
        "ro_sub_ros",
        "RECORD",
        mode="REPEATED",
        fields=[
            bigquery.SchemaField("sub_ro_id", "STRING", mode="REQUIRED"),
            bigquery.SchemaField("valid", "BOOL", mode="REQUIRED"),
            bigquery.SchemaField("sub_ro_name", "STRING", mode="REQUIRED"),
            bigquery.SchemaField("sub_ro_dates", "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("from", "DATE", mode="REQUIRED"),
                bigquery.SchemaField("to", "DATE", mode="REQUIRED"),
                ]
                ),
            bigquery.SchemaField("sub_ro_budget", "FLOAT", mode="REQUIRED"),
            bigquery.SchemaField("sub_ro_revenue_price", "FLOAT", mode="REQUIRED"),
            bigquery.SchemaField("sub_ro_revenue_selected",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),

            ]
            ),
            bigquery.SchemaField("sub_ro_revenue_model_selected",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),

            ]
            ),
            bigquery.SchemaField("sub_ro_campaigns_selected","RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("model_list",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),
            ]
            )
            ]),

            bigquery.SchemaField("sub_ro_ios_selected","RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("model_list",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),
            ]
            )
            ]),

            bigquery.SchemaField("sub_ro_client_id","RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("model_list",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),
            ]
            )
            ]),

            #

            bigquery.SchemaField("sub_ro_ids_selected","RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("model_list",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),
            ]
            )
            ]),

            bigquery.SchemaField("sub_ro_pixels_selected","RECORD",
            mode="REPEATED",
            fields=[
                bigquery.SchemaField("model_list",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),
            ]
            )
            ]),

            bigquery.SchemaField("kpi_1_metric_selected",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),

            ]
            ),

            bigquery.SchemaField("attribution_model_selected",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),

            ]
            ),

            bigquery.SchemaField("kpi_window_selected",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),

            ]
            ),

            bigquery.SchemaField("deepMetrics_selected",
            "STRUCT", mode="REQUIRED",
            fields=[
                bigquery.SchemaField("label", "STRING", mode="REQUIRED"),
                bigquery.SchemaField("value", "STRING", mode="REQUIRED"),

            ]
            ),

            bigquery.SchemaField("sub_ro_kpi_goal", "STRING", mode="REQUIRED"),




        ],
    )
    ]

When I try to upload this data using bigquery client library I get this error:

job_config = bigquery.LoadJobConfig(schema=schema)
    return bq.client.load_table_from_dataframe(
        df, tablename, job_config=job_config
    ).result()

throws:

google.api_core.exceptions.BadRequest: 400 Error while reading data, error message: Schema mismatch: referenced variable 'ro_sub_ros.$is_not_null' has array levels of 1, while the corresponding field path to Parquet column has 0 repeated 
fields.

Not sure what's going wrong here, In case my schema is too big and bulky to analyze, can someone show a minimal example of uploading a REPEATED RECORD in google bigquery using client library and pandas data frame?

you can consider validating these options.

Validate the BigQuery schema is correct, this is an example, using repeating records. You can see official documentation .

# from google.cloud import bigquery
# client = bigquery.Client()
# project = client.project
# dataset_ref = bigquery.DatasetReference(project, 'my_dataset')
 
schema = [
    bigquery.SchemaField("id", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
    bigquery.SchemaField("dob", "DATE", mode="NULLABLE"),
    bigquery.SchemaField(
        "addresses",
        "RECORD",
        mode="REPEATED",
        fields=[
            bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
            bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
            bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
            bigquery.SchemaField("state", "STRING", mode="NULLABLE"),
            bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
            bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"),
        ],
    ),
]
table_ref = dataset_ref.table("my_table")
table = bigquery.Table(table_ref, schema=schema)
table = client.create_table(table)  # API request
print("Created table {}".format(table.full_table_id))

Validate the records syntax is correct. Here is an example with values of the schema.

{"id":"1","first_name":"John","last_name":"Doe","dob":"1968-01-22","addresses":[{"status":"current","address":"123 First Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456 Main Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]}

{"id":"2","first_name":"Jane","last_name":"Doe","dob":"1980-10-16","addresses":[{"status":"current","address":"789 Any Avenue","city":"New York","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321 Main Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}

Consider using the “autodetect schema” in your python code. Similar to this example. You can see more documentation .

from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# TODO(developer): Set table_id to the ID of the table to create.
# table_id = "your-project.your_dataset.your_table_name

# Set the encryption key to use for the destination.
# TODO: Replace this key with a key you have created in KMS.
# kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format(
#     "cloud-samples-tests", "us", "test", "test"
# )
job_config = bigquery.LoadJobConfig(
    autodetect=True, source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
)
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json"
load_job = client.load_table_from_uri(
    uri, table_id, job_config=job_config
)  # Make an API request.
load_job.result()  # Waits for the job to complete.
destination_table = client.get_table(table_id)
print("Loaded {} rows.".format(destination_table.num_rows))

You can validate the JSON format in this page .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM