[英]Appending items with different schema to an existing file in Avro using python
我剛剛開始使用 Avro(和 python)。 我想檢查模式演變。 我准備了 2 個模式,首先使用第一個保存數據,然后添加新數據並使用模式 2 保存。寫入時我沒有收到任何錯誤,但我無法反序列化數據。 我想我的語法是錯誤的。 我該如何繼續將具有新架構的項目添加到現有文件中?
schema = avro.schema.Parse(open('user.avsc', "r").read())
writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema)
writer.append({"name": "Anna", "favorite_number": 1})
writer.append({"name": "Jan", "favorite_number": 13, "favorite_color": "blue"})
writer.close()
reader = DataFileReader(open("users.avro", "rb"), DatumReader())
for user in reader:
print (user)
reader.close()
{'name': 'Anna', 'favorite_number': 1, 'favorite_color': None}
{'name': 'Jan', 'favorite_number': 13, 'favorite_color': 'blue'}
schema2 = avro.schema.Parse(open('user2.avsc', "r").read())
writer = DataFileWriter(open("users.avro", "ab"), DatumWriter(), schema2)
writer.append({"name": "Eva", "favorite_number": 5, "favorite_food":"raclette"})
writer.append({"name": "Adam", "favorite_number": 122, "favorite_color": "black", "favorite_film": "Gone with the wind"})
writer.close()
reader = DataFileReader(open("users.avro", "rb"), DatumReader())
for user in reader:
print (user)
reader.close()
Invalid UTF-8 input bytes: b'\x01\x04\x14avro.codec\x08null\x16avro.schema\xf0\x05{"type": "record", "n'
{'name': 'Anna', 'favorite_number': 1, 'favorite_color': None}
{'name': 'Jan', 'favorite_number': 13, 'favorite_color': 'blue'}
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-128-cbc8ab11fe9f> in <module>
1 reader = DataFileReader(open("users.avro", "rb"), DatumReader())
----> 2 for user in reader:
3 print (user)
4 reader.close()
~\Anaconda3\lib\site-packages\avro\datafile.py in __next__(self)
524 self._read_block_header()
525
--> 526 datum = self.datum_reader.read(self.datum_decoder)
527 self._block_count -= 1
528 return datum
~\Anaconda3\lib\site-packages\avro\io.py in read(self, decoder)
487 if self.reader_schema is None:
488 self.reader_schema = self.writer_schema
--> 489 return self.read_data(self.writer_schema, self.reader_schema, decoder)
490
491 def read_data(self, writer_schema, reader_schema, decoder):
~\Anaconda3\lib\site-packages\avro\io.py in read_data(self, writer_schema, reader_schema, decoder)
532 return self.read_union(writer_schema, reader_schema, decoder)
533 elif writer_schema.type in ['record', 'error', 'request']:
--> 534 return self.read_record(writer_schema, reader_schema, decoder)
535 else:
536 fail_msg = "Cannot read unknown schema type: %s" % writer_schema.type
~\Anaconda3\lib\site-packages\avro\io.py in read_record(self, writer_schema, reader_schema, decoder)
732 readers_field = readers_fields_dict.get(field.name)
733 if readers_field is not None:
--> 734 field_val = self.read_data(field.type, readers_field.type, decoder)
735 read_record[field.name] = field_val
736 else:
~\Anaconda3\lib\site-packages\avro\io.py in read_data(self, writer_schema, reader_schema, decoder)
510 return decoder.read_boolean()
511 elif writer_schema.type == 'string':
--> 512 return decoder.read_utf8()
513 elif writer_schema.type == 'int':
514 return decoder.read_int()
~\Anaconda3\lib\site-packages\avro\io.py in read_utf8(self)
260 except UnicodeDecodeError as exn:
261 logger.error('Invalid UTF-8 input bytes: %r', input_bytes)
--> 262 raise exn
263
264 def check_crc32(self, bytes):
~\Anaconda3\lib\site-packages\avro\io.py in read_utf8(self)
257 input_bytes = self.read_bytes()
258 try:
--> 259 return input_bytes.decode('utf-8')
260 except UnicodeDecodeError as exn:
261 logger.error('Invalid UTF-8 input bytes: %r', input_bytes)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xf0 in position 30: invalid continuation byte
根據規范,Avro 對象文件只能包含一個模式。
演化過程被定義為具有不同於寫入器模式的讀取器模式,但仍然能夠讀取舊數據。
例如,您可以讀取一個沒有最喜歡的電影的文件,但讀者模式定義了一個默認的最喜歡的電影“none”
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.