简体   繁体   中英

How to efficiently check if a folder contains a list of files?

I want to check if all of the files (B01:B12) are present in a certain folder. If that is the case it should return True . I know the end of the filenames, but the beginning can vary.

Currently, I have the following code. It works, but I feel that it can be done a lot more efficient. Does anyone have an idea on how to improve this?

def Check3(filename, root):
    path = os.path.join(root, filename)
    os.chdir(path)
    for dirpath, dirnames, filenames in os.walk(path):
        for filename in filenames:
            if filename.endswith('_B01.jp2'):
                B01 = True
            elif filename.endswith('_B02.jp2'):
                B02 = True
            elif filename.endswith('_B03.jp2'):
                B03 = True
            elif filename.endswith('_B04.jp2'):
                B04 = True
            elif filename.endswith('_B05.jp2'):
                B05 = True
            elif filename.endswith('_B06.jp2'):
                B06 = True
            elif filename.endswith('_B07.jp2'):
                B07 = True
            elif filename.endswith('_B08.jp2'):
                B08 = True
            elif filename.endswith('_B8A.jp2'):
                B8A = True
            elif filename.endswith('_B09.jp2'):
                B09 = True
            elif filename.endswith('_B10.jp2'):
                B10 = True
            elif filename.endswith('_B11.jp2'):
                B11 = True
            elif filename.endswith('_B12.jp2'):
                B12 = True

    return B01 and B02 and B03 and B04 and B05 and B06 and B07\
     and B08 and B8A and B09 and B10 and B11 and B12

You can use pathlib to get all files, extract last 8 characters from the file names, then build expected suffixes, compare lastly.

from pathlib import Path

all_last8 = set()
for path in Path(r'your directory').rglob('*.jp2'):
    # exract last 8 chars of file name
    all_last8.add(path.name[-8:])
# construct all expected suffixes
# hardcode this way, it is same run time efficient
# more verbose though
expected = {'_B01.jp2', '_B02.jp2', '_B03.jp2', }  # ...
# if they are of same pattern
# expected = set([f'_B{str(i).zfill(2)}.jp2' for i in range(1, 13)])

valid = all_last8.issuperset(expected)
print(valid)

The code firstly get all file names and suffixes, there could be more efficient ways that compares while globing.

You could use the glob library, it lists the files that match a given condition under the folders you want to check.

from glob import glob

def Check3(root):
    # list the files which match a specific condition
    files = glob('{}/*/*.jp2'.format(root))
    
    # create the list of files you want to check that exists
    extensions_check_list = ['_B01.jp2', '_B02.jp2', '_B03.jp2', '_B04.jp2', '_B05.jp2', '_B06.jp2', '_B07.jp2', '_B08.jp2', '_B09.jp2', '_B10.jp2', '_B11.jp2', '_B12.jp2']
    
    # if the number of found files is equal to the number of the expected returns True
    return sum([file in extensions_check_list for file in files]) == len(extensions_check_list)
import wizzi_utils as wu  # pip install wizzi_utils


def check_if_sequential(dir_path: str, files_suffix: list) -> bool:
    files_in_dir = wu.find_files_in_folder(dir_path=dir_path, file_suffix='')
    print('files_in_dir:')
    for idx, f in enumerate(files_in_dir):
        print('\t{}: {}'.format(idx + 1, f))
    all_found = True
    for suffix in files_suffix:
        file_with_suffix_found = False
        for file in files_in_dir:
            if file.endswith(suffix):
                file_with_suffix_found = True
                break
        if not file_with_suffix_found:
            print('suffix {} not found'.format(suffix))
            all_found = False
            break
    if all_found:
        print('all files with suffix given found in folder')
    else:
        print('not all files found')
    return all_found


def main() -> None:
    files_suffix = [
        '_B01.jp2', '_B02.jp2', '_B03.jp2', '_B04.jp2', '_B05.jp2', '_B06.jp2', '_B07.jp2',
        '_B08.jp2', '_B8A.jp2', '_B09.jp2', '_B10.jp2', '_B11.jp2', '_B12.jp2',
    ]
    _ = check_if_sequential(dir_path='./my_files', files_suffix=files_suffix)
    return


if __name__ == '__main__':
    main()

If all files suffix are in the folder(and 1 extra file that we dont need), the output will be:

files_in_dir:
    1: D:/workspace/2021wizzi_utils/temp/my_files/bla_B01.jp2
    2: D:/workspace/2021wizzi_utils/temp/my_files/bla_B02.jp2
    3: D:/workspace/2021wizzi_utils/temp/my_files/bla_B03.jp2
    4: D:/workspace/2021wizzi_utils/temp/my_files/bla_B04.jp2
    5: D:/workspace/2021wizzi_utils/temp/my_files/bla_B06.jp2
    6: D:/workspace/2021wizzi_utils/temp/my_files/bla_B07.jp2
    7: D:/workspace/2021wizzi_utils/temp/my_files/bla_B08.jp2
    8: D:/workspace/2021wizzi_utils/temp/my_files/bla_B09.jp2
    9: D:/workspace/2021wizzi_utils/temp/my_files/bla_B10.jp2
    10: D:/workspace/2021wizzi_utils/temp/my_files/bla_B11.jp2
    11: D:/workspace/2021wizzi_utils/temp/my_files/bla_B12.jp2
    12: D:/workspace/2021wizzi_utils/temp/my_files/bla_B8A.jp2
    13: D:/workspace/2021wizzi_utils/temp/my_files/random_file.txt
    14: D:/workspace/2021wizzi_utils/temp/my_files/x_B05.jp2
all files with suffix given found in folder

Now delete one and rerun. i deleted bla_B06.jp2, the output will be:

files_in_dir:
    1: D:/workspace/2021wizzi_utils/temp/my_files/bla_B01.jp2
    2: D:/workspace/2021wizzi_utils/temp/my_files/bla_B02.jp2
    3: D:/workspace/2021wizzi_utils/temp/my_files/bla_B03.jp2
    4: D:/workspace/2021wizzi_utils/temp/my_files/bla_B04.jp2
    5: D:/workspace/2021wizzi_utils/temp/my_files/bla_B07.jp2
    6: D:/workspace/2021wizzi_utils/temp/my_files/bla_B08.jp2
    7: D:/workspace/2021wizzi_utils/temp/my_files/bla_B09.jp2
    8: D:/workspace/2021wizzi_utils/temp/my_files/bla_B10.jp2
    9: D:/workspace/2021wizzi_utils/temp/my_files/bla_B11.jp2
    10: D:/workspace/2021wizzi_utils/temp/my_files/bla_B12.jp2
    11: D:/workspace/2021wizzi_utils/temp/my_files/bla_B8A.jp2
    12: D:/workspace/2021wizzi_utils/temp/my_files/random_file.txt
    13: D:/workspace/2021wizzi_utils/temp/my_files/x_B05.jp2
suffix _B06.jp2 not found
not all files found

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM