简体   繁体   中英

How do I create a zip file of a file path using Python, including empty directories?

I've been trying to use the zipfile and shutil.make_archive modules to recursively create a zip file of a directory. Both modules work great--except empty directories do not get added to the archive. Empty directories containing other empty directories are also silently skipped.

I can use 7Zip to create an archive of the same path and empty directories are preserved. Therefore I know this is possible within the file format itself. I just don't know how to do it within Python. Any ideas? Thanks!

There is a example using zipfile:

import os, zipfile  
from os.path import join  
def zipfolder(foldername, filename, includeEmptyDIr=True):   
    empty_dirs = []  
    zip = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED)  
    for root, dirs, files in os.walk(foldername):  
        empty_dirs.extend([dir for dir in dirs if os.listdir(join(root, dir)) == []])  
        for name in files:  
            zip.write(join(root ,name))  
        if includeEmptyDIr:  
            for dir in empty_dirs:  
                zif = zipfile.ZipInfo(join(root, dir) + "/")  
                zip.writestr(zif, "")  
        empty_dirs = []  
    zip.close() 

if __name__ == "__main__":
    zipfolder('test1/noname/', 'zip.zip')

This is lifted from Adding folders to a zip file using python but is the only function I have tried that works. The one listed as the answer does not work under Python 2.7.3 (doesn't copy empty directories and is inefficient). The following is tried and tested:

#!/usr/bin/python
import os
import zipfile

def zipdir(dirPath=None, zipFilePath=None, includeDirInZip=True):

    if not zipFilePath:
        zipFilePath = dirPath + ".zip"
    if not os.path.isdir(dirPath):
        raise OSError("dirPath argument must point to a directory. "
        "'%s' does not." % dirPath)
    parentDir, dirToZip = os.path.split(dirPath)
    #Little nested function to prepare the proper archive path
    def trimPath(path):
        archivePath = path.replace(parentDir, "", 1)
        if parentDir:
            archivePath = archivePath.replace(os.path.sep, "", 1)
        if not includeDirInZip:
            archivePath = archivePath.replace(dirToZip + os.path.sep, "", 1)
        return os.path.normcase(archivePath)

    outFile = zipfile.ZipFile(zipFilePath, "w",
        compression=zipfile.ZIP_DEFLATED)
    for (archiveDirPath, dirNames, fileNames) in os.walk(dirPath):
        for fileName in fileNames:
            filePath = os.path.join(archiveDirPath, fileName)
            outFile.write(filePath, trimPath(filePath))
        #Make sure we get empty directories as well
        if not fileNames and not dirNames:
            zipInfo = zipfile.ZipInfo(trimPath(archiveDirPath) + "/")
            #some web sites suggest doing
            #zipInfo.external_attr = 16
            #or
            #zipInfo.external_attr = 48
            #Here to allow for inserting an empty directory.  Still TBD/TODO.
            outFile.writestr(zipInfo, "")
    outFile.close()

You'll need to register a new archive format to do that, since the default ZIP archiver does not support that. Take a look at the meat of the existing ZIP archiver . Make your own archiver that creates directories using that currently-unused dirpath variable. I looked for how to create an empty directory and found this :

zip.writestr(zipfile.ZipInfo('empty/'), '')

With that, you should be able to write the necessary code to make it archive empty directories.

(I have still no reputation and can't comment directly)

Suggesting edit to James's answer,

Either change: 'def zipdir(dirPath=None, zipFilePath=None,....' to: 'def zipdir(dirPath,zipFilePath=None,...' Or remove the 'if not os.path.isdir(dirPath):' condition and it's body

Otherwise you'll have issues because of this inconsistency.

def zip_dir(src_dir, dst_zip, *, skip_suffixes=None, dry=False):
    import logging
    from pathlib import Path
    from os import walk
    from tempfile import TemporaryDirectory
    from zipfile import ZipFile, ZipInfo

    _log = logging.getLogger(zip_dir.__name__)
    _log.addHandler(logging.NullHandler())
    _sep = 50 * "-"

    skip_suffixes = skip_suffixes or []
    src_dir, dst_zip = Path(src_dir), Path(dst_zip)
    _log.info("zipping dir: '%s' to: '%s", str(src_dir), str(dst_zip))

    if not src_dir.exists():
        raise FileNotFoundError(str(src_dir))
    if not src_dir.is_dir():
        raise NotADirectoryError(str(src_dir))
    if dst_zip.exists():
        raise FileExistsError(str(dst_zip))

    with TemporaryDirectory() as tmp_dir:
        tmp_zip_path = Path(tmp_dir).joinpath(dst_zip.name)

        with ZipFile(str(tmp_zip_path), mode="w") as zip_out:
            for root, dirs, files in walk(src_dir):
                root = Path(root)

                for folder in dirs:
                    folder = root.joinpath(folder)

                    # add empty folders to the zip
                    if not list(folder.iterdir()):
                        _log.debug(_sep)
                        folder_name = f"{str(folder.relative_to(src_dir))}/"
                        _log.debug("empty dir: '%s'", folder_name)

                        if dry:
                            continue

                        zip_out.writestr(ZipInfo(folder_name), "")

                for file in files:
                    file = root.joinpath(file)
                    _log.debug(_sep)
                    _log.debug("adding:  '%s'", str(file))

                    should_skip = None
                    for suffix in file.suffixes:
                        if suffix in skip_suffixes:
                            should_skip = suffix
                            break

                    if should_skip:
                        _log.debug("skipped [%s]: %s", should_skip, str(file))
                        continue

                    arcname = str(file.relative_to(src_dir))
                    _log.debug("arcname: '%s'", arcname)

                    if dry:
                        continue

                    zip_out.write(str(file), arcname=arcname)

        if not dry:
            dst_zip.write_bytes(tmp_zip_path.read_bytes())

        tmp_zip_path.unlink()

if __name__ == '__main__':
    import logging
    logging.basicConfig(level=logging.DEBUG, format="%(asctime)s | %(levelname)8s | %(module)25s:%(lineno)-5s | %(message)s")

    zip_dir("/tmp/opera_profile", "opera_profile.zip", skip_suffixes=[".log"], dry=True)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM