简体   繁体   中英

Copy directory structure and files excluding file types, sub folders… Is it possible using the standard library without writing a whole script?

I'm looking for a quick way to copy the entire directory structure (including sub folders and files), with the following conditions:

  1. Copy file if it does not exist in the destination or source is newer
  2. Allow excluding a list of sub folders ie ['temp', '.git']
  3. Allow excluding files by type ie [' .txt', ' .pyc', '*.zip']

I have seen some of the answers using shutil.copy and copytree but none is doing what I was looking for...

I am hoping this could by done by using one of the standard utilities by providing arguments etc. If not I will write a script to do it...

This is what I ended up writing... it does the job, I was hoping this basic functionality would be provided by one of the standard libraries...

import os, sys, pathlib, shutil

def copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include):
    srcdir = str(pathlib.Path(srcdir)).replace('\\', '/')
    dstdir = str(pathlib.Path(dstdir)).replace('\\', '/')
    for dirpath, dirs, files in os.walk(pathlib.Path(srcdir)):
        this_dir = dirpath.replace('\\', "/")        
        if os.path.basename(this_dir) in sub_folder_to_include:
            dest_dir = this_dir.replace(srcdir, dstdir)
            # create folder in the destinatin if it does not exist
            pathlib.Path(dest_dir).mkdir(parents=True, exist_ok=True)                
            for filename in files:
                dest_file = os.path.join(dest_dir, os.path.basename(filename))
                source_file = os.path.join(this_dir, filename)
                if os.path.isfile(source_file) and filename.endswith(extensions_to_include):
                    # copy file if destination is older by more than a second, or does not exist
                    if (not os.path.exists(dest_file)) or (os.stat(source_file).st_mtime - os.stat(dest_file).st_mtime > 1):
                        print (f'Copying {source_file} to {dest_dir}')
                        shutil.copy2(source_file, dest_dir)
                    else:
                        print (f'.....Skipping {source_file} to {dest_dir}')

srcdir = 'c:/temp/a'
dstdir = 'c:/temp/j'
sub_folder_to_include = ('a', 'aa','bb')
extensions_to_include = ('.py', '.png', '.gif', '.txt')

copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include)

This is the solution:

import os, sys, pathlib, shutil

def copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include):
    srcdir = str(pathlib.Path(srcdir)).replace('\\', '/')
    dstdir = str(pathlib.Path(dstdir)).replace('\\', '/')
    for dirpath, dirs, files in os.walk(pathlib.Path(srcdir)):
        this_dir = dirpath.replace('\\', "/")        
        if os.path.basename(this_dir) in sub_folder_to_include:
            dest_dir = this_dir.replace(srcdir, dstdir)
            # create folder in the destinatin if it does not exist
            pathlib.Path(dest_dir).mkdir(parents=True, exist_ok=True)                
            for filename in files:
                dest_file = os.path.join(dest_dir, os.path.basename(filename))
                source_file = os.path.join(this_dir, filename)
                if os.path.isfile(source_file) and filename.endswith(extensions_to_include):
                    # copy file if destination is older by more than a second, or does not exist
                    if (not os.path.exists(dest_file)) or (os.stat(source_file).st_mtime - os.stat(dest_file).st_mtime > 1):
                        print (f'Copying {source_file} to {dest_dir}')
                        shutil.copy2(source_file, dest_dir)
                    else:
                        print (f'.....Skipping {source_file} to {dest_dir}')

srcdir = 'c:/temp/a'
dstdir = 'c:/temp/j'
sub_folder_to_include = ('a', 'aa','bb')
extensions_to_include = ('.py', '.png', '.gif', '.txt')

copy_files_on_tree(srcdir, dstdir, sub_folder_to_include, extensions_to_include)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM