[英]Problem writing decimal data as avro using python
I am trying to write some decimal value as avro using python.我正在尝试使用 python 将一些十进制值写为 avro。 The code works fine without the decimal value.该代码在没有十进制值的情况下工作正常。 If I add the decimal value, I get an AvroTypeException: The datum {blah} is not an example of the schema {blah..blah}.如果我添加十进制值,我会得到 AvroTypeException:数据 {blah} 不是模式 {blah..blah} 的示例。 Here is my python code这是我的 python 代码
#trial with avro library
import avro.schema
import avro.io
import io
from decimal import *
from decimal import Decimal as D
schema = """{"name":"DEPARTMENT_111","type":"record","fields":[{"name":"DEPARTMENT_NAME","type":["null","string"],"default":null},{"name":"DEPARTMENT_ID","type":["null",{"type":"bytes","logicalType":"decimal","precision":38,"scale":10}]},{"name":"ETL_BATCH_SK","type":["null","long"],"default":null},{"name":"INSERT_TS","type":["null","string"],"default":null},{"name":"OP_CODE","type":["null","string"],"default":null},{"name":"PROCESSED_FLAG","type":["null","string"],"default":null}]}"""
print(format_json(json.loads(schema)))
parsed_schema = avro.schema.Parse(schema)
writer = avro.io.DatumWriter(parsed_schema)
bytes_writer = io.BytesIO()
encoder = avro.io.BinaryEncoder(bytes_writer)
class DecimalEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, D):
return float(obj)
return json.JSONEncoder.default(self, obj)
sample_department_data = {
"DEPARTMENT_NAME":"Physics",
"DEPARTMENT_ID" : decimal.Decimal("201.0"),
"ETL_BATCH_SK" : 952879684,
"OP_CODE":"I",
"PROCESSED_FLAG":"False"
}
sample_department_json = json.dumps(sample_department_data, cls=DecimalEncoder)
writer.write(sample_department_json, encoder)
However, I get the following error.但是,我收到以下错误。
---------------------------------------------------------------------------
AvroTypeException Traceback (most recent call last)
<ipython-input-42-d78ba6b385e2> in <module>()
47 }
48 sample_department_json = json.dumps(sample_department_data, cls=DecimalEncoder)
---> 49 writer.write(sample_department_json, encoder)
50
51 raw_bytes = bytes_writer.getvalue()
~/.pyenv/versions/3.6.0/lib/python3.6/site-packages/avro/io.py in write(self, datum, encoder)
815 # validate datum
816 if not Validate(self.writer_schema, datum):
--> 817 raise AvroTypeException(self.writer_schema, datum)
818
819 self.write_data(self.writer_schema, datum, encoder)
AvroTypeException: The datum {"DEPARTMENT_NAME": "Physics", "DEPARTMENT_ID": 201.0, "ETL_BATCH_SK": 952879684, "OP_CODE": "I", "PROCESSED_FLAG": "False"} is not an example of the schema {
"type": "record",
"name": "DEPARTMENT_111",
"fields": [
{
"type": [
"null",
"string"
],
"name": "DEPARTMENT_NAME",
"default": null
},
{
"type": [
"null",
{
"type": "bytes",
"logicalType": "decimal",
"precision": 38,
"scale": 10
}
],
"name": "DEPARTMENT_ID"
},
{
"type": [
"null",
"long"
],
"name": "ETL_BATCH_SK",
"default": null
},
{
"type": [
"null",
"string"
],
"name": "INSERT_TS",
"default": null
},
{
"type": [
"null",
"string"
],
"name": "OP_CODE",
"default": null
},
{
"type": [
"null",
"string"
],
"name": "PROCESSED_FLAG",
"default": null
}
]
}
What am I doing wrong?我究竟做错了什么?
I tried to see what could be wrong, but I don't see any problems.我试图看看可能出了什么问题,但我没有看到任何问题。 In fact, if I use fastavro
it works just fine as seen here:事实上,如果我使用fastavro
它就可以正常工作,如下所示:
from decimal import Decimal
import io
import json
import fastavro
schema = """{"name":"DEPARTMENT_111","type":"record","fields":[{"name":"DEPARTMENT_NAME","type":["null","string"],"default":null},{"name":"DEPARTMENT_ID","type":["null",{"type":"bytes","logicalType":"decimal","precision":38,"scale":10}]},{"name":"ETL_BATCH_SK","type":["null","long"],"default":null},{"name":"INSERT_TS","type":["null","string"],"default":null},{"name":"OP_CODE","type":["null","string"],"default":null},{"name":"PROCESSED_FLAG","type":["null","string"],"default":null}]}"""
parsed_schema = fastavro.parse_schema(json.loads(schema))
sample_department_data = {
"DEPARTMENT_NAME":"Physics",
"DEPARTMENT_ID" : Decimal("201.0"),
"ETL_BATCH_SK" : 952879684,
"OP_CODE":"I",
"PROCESSED_FLAG":"False"
}
bio = io.BytesIO()
fastavro.writer(bio, parsed_schema, [sample_department_data])
bio.seek(0)
print(list(fastavro.reader(bio)))
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.