[英]Python shutil pack a zip file and unzip it back EOF error
更新:簡答,不要將存檔打包到與文件源目錄相同的路徑,有問題的錯誤代碼如下
shutil.make_archive(zip_path, 'zip', tmpdir)
原問題:
我正在使用shutils
打包和解壓一個 Tensorflow model 文件夾(我認為這個問題與shutils
更相關)
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
def load_model_as_bytes(model):
def file_chunk_generate(file_path):
CHUNK_SIZE = 4 * 1024 * 1024
with open(file_path, 'rb') as f:
while True:
piece = f.read(CHUNK_SIZE);
if len(piece) == 0:
return
yield ByteChunk(buffer=piece)
return file_chunk_generator
tmpdir = tempfile.mkdtemp()
tf.saved_model.save(model, tmpdir)
zip_path = os.path.join(tmpdir, "tf_model")
shutil.make_archive(zip_path, 'zip', tmpdir)
size = os.path.getsize(f'{zip_path}.zip')
logging.info(f"send model file zip, length: {size}") #-------output 4621
file_chunk_generator = file_chunk_generate(f'{zip_path}.zip')
return file_chunk_generator
class NeuralNetworkPart(Model):
def __init__(self):
super().__init__()
self.d1 = Dense(128, activation='relu')
self.d2 = Dense(10)
def call(self, x):
x = x[0]
x = self.d1(x)
return self.d2(x)
model = NeuralNetworkPart()
it = load_model_as_bytes(model)
tmpdir = tempfile.mkdtemp()
zip_path = os.path.join(tmpdir, "tf_model.zip")
with open(zip_path, 'wb') as f:
for byte_chunk in it:
f.write(byte_chunk.buffer)
logging.info(f"receive model file zip, length: {os.path.getsize(zip_path)}") #-------output 4621
shutil.unpack_archive(zip_path, tmpdir)
基本上這個程序得到一個文件夾,使用make_archive
到zip 就可以了。 然后將zip文件以字節形式讀取,存入生成器變量中,使用生成器寫入另一個zip文件,嘗試使用unpack_archive
解壓。
在寫入字節生成器之前,在解包之前寫入 zip 文件之后,大小相同(在日志記錄中檢查),但是在調用解包時,它會引發 EOF 錯誤
shutil.unpack_archive(zip_path, tmpdir)
File "/lib/python3.6/shutil.py", line 983, in unpack_archive
func(filename, extract_dir, **kwargs)
File "/lib/python3.6/shutil.py", line 901, in _unpack_zipfile
data = zip.read(info.filename)
File "/lib/python3.6/zipfile.py", line 1338, in read
return fp.read()
File "/lib/python3.6/zipfile.py", line 858, in read
buf += self._read1(self.MAX_N)
File "/lib/python3.6/zipfile.py", line 940, in _read1
data += self._read2(n - len(data))
File "/lib/python3.6/zipfile.py", line 975, in _read2
raise EOFError
這個稍微簡化的版本似乎工作得很好。 請注意,這不會清除任何臨時文件; 你可能想在你的 tmpdir 充滿 TensorFlow 模型之前修復它。
import os
import shutil
import tempfile
def file_chunk_generate(file_path):
CHUNK_SIZE = 4 * 1024 * 1024
with open(file_path, "rb") as f:
while True:
piece = f.read(CHUNK_SIZE)
if not piece:
return
yield piece
def get_zip_chunk_generator(source_dir):
arcname = shutil.make_archive(
os.path.join(tempfile.mkdtemp("zip-"), "tf_model"), "zip", source_dir
)
return file_chunk_generate(arcname)
def make_source_dir():
tmpdir = tempfile.mkdtemp("src-")
for x in range(5):
with open(os.path.join(tmpdir, f"test-{x}.txt"), "wb") as f:
f.write(b"foo" * 1024)
return tmpdir
source_dir = make_source_dir()
it = get_zip_chunk_generator(source_dir)
dest_dir = tempfile.mkdtemp(prefix="dest-")
print("1", os.listdir(dest_dir))
zip_path = os.path.join(dest_dir, "tf_model_dest.zip")
with open(zip_path, "wb") as f:
for byte_chunk in it:
f.write(byte_chunk)
print("2", os.listdir(dest_dir))
shutil.unpack_archive(zip_path, dest_dir)
print("3", os.listdir(dest_dir))
output 是
1 []
2 ['tf_model_dest.zip']
3 ['test-0.txt', 'test-1.txt', 'test-3.txt', 'test-2.txt', 'tf_model_dest.zip', 'test-4.txt']
如您所料。
如果您要通過 .network 訪問 stream,我建議您使用 tarball(因為您確實可以在磁盤上根本沒有任何文件的情況下做到這一點;ZIP 需要尋求支持才能解壓,但 TAR 不需要)。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.