简体   繁体   中英

Recurse through directories and log files by file type in python

I want to investigate a set of directories and get the following info

  1. No of files by filetype
  2. List of files by filetype with full path

  3. 1&2 per subdirectory directory

I have the following code. The ext list generator is fine. I am stuck on how to assign the name of the list and counter for each extension. We don't know what these will be or how many there are. Not sure what other issues may arise after this.

import os, sys, datetime

top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
outDIR = top+"\\workingFiles" # directory where output is written to. Includes temp files
# END setting base paths
# NOTHING BELOW should need editing.
List =[]
extList=[]

os.chdir(top)

for root, dirs, files in os.walk(SourceDIR, topdown=False):
    for fl in files:
      currentFile=os.path.join(root, fl)
      ext=fl[fl.rfind('.'):]
      if ext not in extList:
        extList.append(ext)
      List.append(currentFile)

print extList

for ext in extList:
    ext+"Counter"=0
    ext+"FileList"=[]

for fl in List:
    ext=fl[fl.rfind('.'):]
    ext+"Counter"+=1
    ext+"FileList".append(fl)

for ext in extList:
    print ext
    print ext+"Counter"
    print ext+"FileList"

CODE updated as per answer. Issue with txt file as it only creates one text file.

# iterate over dictionary keys
for elem in ext_dict.keys():
    print elem
    print ext_dict[elem]["Counter"]
    print ext_dict[elem]["FileList"]
    log = open(elem+'_Log.txt', 'a')
    Num=0
    for fl in ext_dict[elem]["FileList"]:
        Num+=1
        log.write(str(Num)+","+str(fl)+"\n")
    log.close()

The final script for anyone to use follows.

#-------------------------------------------------------------------------------
# Name:    File_Review
# Purpose: Review of all files in directory/subdirectories with report on file type and size
#
# Author:      georgec
#
# Created:     25/01/2013
# Copyright:   (c) ATGIS 2013
# Licence:     Creative Commons 3.0 - BY
#-------------------------------------------------------------------------------

import os, sys, datetime

top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
SourceDIR = r'P:\2013'
outDIR = top # directory where output is written to. Includes temp files
finalDIR = top+"\\final" # folder for final data only
DirLimiterList=['']

# END setting base paths
# NOTHING BELOW should need editing.

os.chdir(top)

def InvestigateFiles(SourceDIR,outDIR,DirLimiter):
    List =[]
    extList=[]
    dirList=[]
    dirCount=0
    for root, dirs, files in os.walk(SourceDIR, topdown=False):
        for fl in files:
            currentFile=os.path.join(root, fl)
            ext=fl[fl.rfind('.')+1:]
            if ext!='':
                if DirLimiter in currentFile:
                    List.append(currentFile)
                    directory1=os.path.basename(os.path.normpath(currentFile[:currentFile.rfind(DirLimiter)]))
                    directory2=(currentFile[len(SourceDIR):currentFile.rfind('\\'+directory1+DirLimiter)])
                    directory=directory2+'\\'+directory1
                    if directory not in dirList:
                        dirCount+=1
                        dirList.append(directory)


            if ext not in extList:
              extList.append(ext)

    print extList

    ext_dict = {}

    # Create the dictionary
    for ext in extList:
        ext_dict[ext] = {}
        ext_dict[ext]["Counter"] = 0
        ext_dict[ext]["FileList"] = []

    #populate the dictionary
    for fl in List:
        if ext_dict.has_key(fl[fl.rfind('.')+1:]):
            ext = fl[fl.rfind('.')+1:]
            ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
            ext_dict[ext]["FileList"].append(fl)

    # iterate over dictionary keys
    for elem in ext_dict.keys():
        uniqueDirList=[]
        print elem
        print ext_dict[elem]["Counter"]
        count= ext_dict[elem]["Counter"]
        print ext_dict[elem]["FileList"]
        log = open(elem+'_'+DirLimiter[DirLimiter.find('\\')+1:DirLimiter.rfind('\\')]+'_Log.txt', 'a')
        Num=0
        for fl in ext_dict[elem]["FileList"]:
            Num+=1
            log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+"\n")
##            finaldir=fl[fl.rfind(DirLimiter):fl.rfind('\\')]
##            directory2=fl[fl.rfind('\\Input\\')+6:fl.rfind('\\')]
##            uniqueDir=directory2+finaldir
##            if uniqueDir not in uniqueDirList:
##             uniqueDirList.append(uniqueDir)
##             log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";"+str(uniqueDir)+'\n')
##             log.write(finaldir+"\n"+directory2+"\n"+uniqueDir+"\n"+"\n")
##            else:
##             log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";\n")
##        log.write('Directories: '+str(count)+'\n Unique Directories: '+str(len(uniqueDirList)))
        log.close()

for DirLimiter in DirLimiterList:
 InvestigateFiles(SourceDIR,outDIR,DirLimiter)

you should use a dictionary for storing the data

ext_dict = {}

# Create the dictionary
for ext in extList:
    ext_dict[ext] = {}
    ext_dict[ext]["Counter"] = 0
    ext_dict[ext]["FileList"] = []

#populate the dictionary
for fl in List:
    if ext_dict.has_key(f1[f1.rfind('.'):]):
        ext = f1[f1.rfind('.'):]
        ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
        ext_dict[ext]["FileList"].append(fl)

# iterate over dictionary keys
for elem in ext_dict.keys():
    print elem
    print ext_dict[elem]["counter"]
    print ext_dict[elem]["FileList"]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM