[英]Python OS.WALK Remove Directories
我正在嘗試從 os.walk 中刪除目錄(我不需要這些目錄中的文件)
我的代碼:
def findit(root, exclude_files=[], exclude_dirs=[]):
exclude_files = (fnmatch.translate(i) for i in exclude_files)
exclude_files = '('+')|('.join(exclude_files)+')'
exclude_files = re.compile(exclude_files)
exclude_dirs = (os.path.normpath(i) for i in exclude_dirs)
exclude_dirs = (os.path.normcase(i) for i in exclude_dirs)
exclude_dirs = set(exclude_dirs)
return (os.path.join(r,f)
for r,_,f in os.walk(root)
if os.path.normpath(os.path.normcase(r)) not in exclude_dirs
for f in f
if not exclude_files.match(os.path.normcase(f)))
它可以過濾文件,當我嘗試過濾掉 c:/windows 時,它仍然會從 windows 目錄中顯示我的文件,我是否遺漏了什么?
filelist = list(findit('c:/',exclude_files = ['*.dll', '*.dat', '*.log', '*.exe'], exclude_dirs = ['c:/windows', 'c:/program files', 'c:/else']))
過濾目錄時,您不會阻止os.walk()
進入子目錄。
您需要清除dirs
列表才能發生這種情況:
def findit(root, exclude_files=[], exclude_dirs=[]):
exclude_files = (fnmatch.translate(i) for i in exclude_files)
exclude_files = '('+')|('.join(exclude_files)+')'
exclude_files = re.compile(exclude_files)
exclude_dirs = (os.path.normpath(i) for i in exclude_dirs)
exclude_dirs = (os.path.normcase(i) for i in exclude_dirs)
exclude_dirs = set(exclude_dirs)
for current, dirs, files in os.walk(root):
if os.path.normpath(os.path.normcase(current)) in exclude_dirs:
# exclude this dir and subdirectories
dirs[:] = []
continue
for f in files:
if not exclude_files.match(os.path.normcase(f)):
yield os.path.join(current, f)
dirs[:] = []
賦值清除了列表; 它從列表中刪除所有目錄名。 由於此列表與os.walk()
共享,后者使用此列表隨后訪問子目錄,這有效地阻止了os.walk()
訪問這些子目錄。
看了上面的回復讓我很疑惑。 在我看來,os.walk 丟失了,並且根參數似乎沒有根據需要使用。 此外,任何一個可選參數為空列表的情況都應該有效。 建議使用較少的命名空間查找和排除每個目錄級別的目錄的通配符的輕微變化:
import os
import re
import fnmatch
import os.path
def findit(root, exclude_files=[], exclude_dirs=[], exclude_dirs_wc=[]):
"""Generate all files found under root excluding some.
Excluded files are given as a list of Unix shell-style wildcards
that exclude matches in each directory. Excluded directories are
assumed to be paths starting at root; no wildcards. Directory
wildcards at each level can be supplied.
"""
# Less namespace look-up.
join = os.path.join
normpath = os.path.normpath; normcase = os.path.normcase
#
def make_exclude_regex_from(lst):
if len(lst):
lst = (fnmatch.translate(i) for i in lst)
lst = "({})".format(")|(".join(lst))
lst = re.compile(lst)
return lst
#
exclude_files = make_exclude_regex_from(exclude_files)
exclude_dirs_wc = make_exclude_regex_from(exclude_dirs_wc)
if len(exclude_dirs):
exclude_dirs = (normpath(i) for i in exclude_dirs)
exclude_dirs = (normcase(i) for i in exclude_dirs)
exclude_dirs = set(exclude_dirs)
for current, dirs, files in os.walk(root):
current_dir = normpath(normcase(current))
if exclude_dirs and current_dir in exclude_dirs:
# Prune set of dirs to exclude.
exclude_dirs.discard(current_dir)
# Disregard sub-directories.
dirs[:] = [] # IN PLACE, since it is a loop var.
continue
if exclude_dirs_wc:
for dd in dirs[:]:
if exclude_dirs_wc.match(normcase(dd)):
dirs.remove(dd) # IN PLACE
if exclude_files:
for ff in files[:]:
if exclude_files.match(normcase(ff)):
files.remove(ff) # IN PLACE; also a loop var.
for f in files:
yield join(current,f)
您可以在使用 os.walk("pathName") 遍歷時使用關鍵字“continue”跳過迭代
for dirpath, dirnames, filenames in os.walk(pathName):
# Write regular expression or a string to skip the desired folder
dirpath_pat = re.search(pattern, dirpath)
if dirpath_pat:
if dirpath_pat.group(0):
continue
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.