简体   繁体   中英

How can I accurately search a directory with elements from a text file?

Fairly newish to python. I'm trying to use a list made from a text file to search directories for files and folders. Its working to certain extent but only if the search element is at the start of the file/folder name as I'm using .startswith().

The code I'm using is below.

I'm thinking regex might be the way to go but can't seem to figure it out.

Any help appreciated.

Thanks.

import os

def data_cleansing(path):

    dirscount = 0
    filescount = 0


    with open("Y:\Admin\Data Cleansing\DCList.txt","r") as f:
        x = f.readlines()
        x = [x.replace("\n","") for x in x]

    #print(x)


    for root, dirs, files in os.walk(path):
        for dirs in dirs:
            if dirs.startswith(tuple(x)):
                dirscount = dirscount + 1
                #print(dirscount)
                print(os.path.join(dirs))



    for root, dirs, files in os.walk(path):
        for files in files:
            if files.startswith(tuple(x)):
                filescount = filescount + 1
                #print(filescount)
                print(os.path.join(files))



    total = (dirscount + filescount)
    print(total,"files and folders found in",path,"that need cleansing.")

data_cleansing(r"G:\Processed\failed")
print("*"*70)
data_cleansing(r"G:\Processed\done")
print("*"*70)
data_cleansing(r"S:\Prosort")
print("*"*70)
data_cleansing(r"S:\Prosort_Archive")

Does this fit your needs?

from os import scandir
find_list = ['Uni', '.doc', 'XML']
path = r"c:\\Autoit\\ForumTests\\"

def scantree(path):
    """Recursively yield DirEntry objects for given directory."""
    for entry in scandir(path):
        if entry.is_dir(follow_symlinks=False):
            yield from scantree(entry.path)
        else:
            yield entry

if __name__ == '__main__':
    for entry in scantree(path):
        for pattern in find_list:
            if pattern in entry.path:
                print(pattern, '-->', entry.path)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM