简体   繁体   中英

Fastest/safest file finding/parsing?

On c: , I have tens of thousands of *.foobar files. They're in all sorts of places (ie subdirs). These files are roughly 1 - 64 kb in size, and plaintext.

I have a class Foobar(string fileContents) that strongly types these .foobar files.

My challenge to is get a list of all the *.foobar files on c: , represented as an array of Foobar objects. What's the quickest way to do this?

I'm interested to find out if there's a better way (undoubtedly) than my first approach, which follows, and if this approach of mine has any potential problems (eg I/O concurrency issues throwing exceptions?):

var files = Directory.EnumerateFiles
                (rootPath, "*.foobar", SearchOption.AllDirectories);

Foobar[] foobars = 
(
    from filePath in files.AsParallel()
    let contents = File.ReadAllText(filePath)
    select new Foobar(contents)
)
.ToArray();

Because permission errors (or other errors) can apparently stop the enumeration dead in its tracks, you may want to implement your own enumerator something like this:

class SafeFileEnumerator : IEnumerable<string>
{
  private string root;
  private string pattern;
  private IList<Exception> errors;
  public SafeFileEnumerator(string root, string pattern)
  {
     this.root = root;
     this.pattern = pattern;
     this.errors = new List<Exception>();
  }

  public SafeFileEnumerator(string root, string pattern, IList<Exception> errors)
  {
     this.root = root;
     this.pattern = pattern;
     this.errors = errors;
  }

  public Exception[] Errors()
  {
     return errors.ToArray();
  }
  class Enumerator : IEnumerator<string>
  {
     IEnumerator<string> fileEnumerator;
     IEnumerator<string> directoryEnumerator;
     string root;
     string pattern;
     IList<Exception> errors;

     public Enumerator(string root, string pattern, IList<Exception> errors)
     {
        this.root = root;
        this.pattern = pattern;
        this.errors = errors;
        fileEnumerator = System.IO.Directory.EnumerateFiles(root, pattern).GetEnumerator();
        directoryEnumerator = System.IO.Directory.EnumerateDirectories(root).GetEnumerator();
     }
     public string Current
     {
        get
        {
           if (fileEnumerator == null) throw new ObjectDisposedException("FileEnumerator");
           return fileEnumerator.Current;
        }
     }

     public void Dispose()
     {
        if (fileEnumerator != null)
           fileEnumerator.Dispose();
        fileEnumerator = null;
        if (directoryEnumerator != null)
           directoryEnumerator.Dispose();
        directoryEnumerator = null;
     }

     object System.Collections.IEnumerator.Current
     {
        get { return Current; }
     }

     public bool MoveNext()
     {
        if ((fileEnumerator != null) && (fileEnumerator.MoveNext()))
           return true;
        while ((directoryEnumerator != null) && (directoryEnumerator.MoveNext()))
        {
           if (fileEnumerator != null)
              fileEnumerator.Dispose();
           try
           {
              fileEnumerator = new SafeFileEnumerator(directoryEnumerator.Current, pattern, errors).GetEnumerator();
           }
           catch (Exception ex)
           {
              errors.Add(ex);
              continue;
           }
           if (fileEnumerator.MoveNext())
              return true;
        }
        if (fileEnumerator != null)
           fileEnumerator.Dispose();
        fileEnumerator = null;
        if (directoryEnumerator != null)
           directoryEnumerator.Dispose();
        directoryEnumerator = null;
        return false;
     }

     public void Reset()
     {
        Dispose();
        fileEnumerator = System.IO.Directory.EnumerateFiles(root, pattern).GetEnumerator();
        directoryEnumerator = System.IO.Directory.EnumerateDirectories(root).GetEnumerator();
     }
  }
  public IEnumerator<string> GetEnumerator()
  {
     return new Enumerator(root, pattern, errors);
  }

  System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
  {
     return GetEnumerator();
  }
}

Great work, here is an extension to your code to return FileSystemInfo's instead of string paths. Some minor changes in line, like adding in SearchOption (like the native .net one has), and error trapping on initial directory get in case the root folder is access denied. Thanks again for the original posting!

public class SafeFileEnumerator : IEnumerable<FileSystemInfo>
{
    /// <summary>
    /// Starting directory to search from
    /// </summary>
    private DirectoryInfo root;

    /// <summary>
    /// Filter pattern
    /// </summary>
    private string pattern;

    /// <summary>
    /// Indicator if search is recursive or not
    /// </summary>
    private SearchOption searchOption;

    /// <summary>
    /// Any errors captured
    /// </summary>
    private IList<Exception> errors;

    /// <summary>
    /// Create an Enumerator that will scan the file system, skipping directories where access is denied
    /// </summary>
    /// <param name="root">Starting Directory</param>
    /// <param name="pattern">Filter pattern</param>
    /// <param name="option">Recursive or not</param>
    public SafeFileEnumerator(string root, string pattern, SearchOption option)
        : this(new DirectoryInfo(root), pattern, option)
    {}

    /// <summary>
    /// Create an Enumerator that will scan the file system, skipping directories where access is denied
    /// </summary>
    /// <param name="root">Starting Directory</param>
    /// <param name="pattern">Filter pattern</param>
    /// <param name="option">Recursive or not</param>
    public SafeFileEnumerator(DirectoryInfo root, string pattern, SearchOption option)
        : this(root, pattern, option, new List<Exception>()) 
    {}

    // Internal constructor for recursive itterator
    private SafeFileEnumerator(DirectoryInfo root, string pattern, SearchOption option, IList<Exception> errors)
    {
        if (root == null || !root.Exists)
        {
            throw new ArgumentException("Root directory is not set or does not exist.", "root");
        }
        this.root = root;
        this.searchOption = option;
        this.pattern = String.IsNullOrEmpty(pattern)
            ? "*"
            : pattern;
        this.errors = errors;
    }

    /// <summary>
    /// Errors captured while parsing the file system.
    /// </summary>
    public Exception[] Errors
    {
        get
        {
            return errors.ToArray();
        }
    }

    /// <summary>
    /// Helper class to enumerate the file system.
    /// </summary>
    private class Enumerator : IEnumerator<FileSystemInfo>
    {
        // Core enumerator that we will be walking though
        private IEnumerator<FileSystemInfo> fileEnumerator;
        // Directory enumerator to capture access errors
        private IEnumerator<DirectoryInfo> directoryEnumerator;

        private DirectoryInfo root;
        private string pattern;
        private SearchOption searchOption;
        private IList<Exception> errors;

        public Enumerator(DirectoryInfo root, string pattern, SearchOption option, IList<Exception> errors)
        {
            this.root = root;
            this.pattern = pattern;
            this.errors = errors;
            this.searchOption = option;

            Reset();
        }

        /// <summary>
        /// Current item the primary itterator is pointing to
        /// </summary>
        public FileSystemInfo Current
        {
            get
            {
                //if (fileEnumerator == null) throw new ObjectDisposedException("FileEnumerator");
                return fileEnumerator.Current as FileSystemInfo;
            }
        }

        object System.Collections.IEnumerator.Current
        {
            get { return Current; }
        }

        public void Dispose()
        {
            Dispose(true, true);
        }

        private void Dispose(bool file, bool dir)
        {
            if (file)
            {
                if (fileEnumerator != null)
                    fileEnumerator.Dispose();

                fileEnumerator = null;
            }

            if (dir)
            {
                if (directoryEnumerator != null)
                    directoryEnumerator.Dispose();

                directoryEnumerator = null;
            }
        }

        public bool MoveNext()
        {
            // Enumerate the files in the current folder
            if ((fileEnumerator != null) && (fileEnumerator.MoveNext()))
                return true;

            // Don't go recursive...
            if (searchOption == SearchOption.TopDirectoryOnly) { return false; }

            while ((directoryEnumerator != null) && (directoryEnumerator.MoveNext()))
            {
                Dispose(true, false);

                try
                {
                    fileEnumerator = new SafeFileEnumerator(
                        directoryEnumerator.Current,
                        pattern,
                        SearchOption.AllDirectories,
                        errors
                        ).GetEnumerator();
                }
                catch (Exception ex)
                {
                    errors.Add(ex);
                    continue;
                }

                // Open up the current folder file enumerator
                if (fileEnumerator.MoveNext())
                    return true;
            }

            Dispose(true, true);

            return false;
        }

        public void Reset()
        {
            Dispose(true,true);

            // Safely get the enumerators, including in the case where the root is not accessable
            if (root != null)
            {
                try
                {
                    fileEnumerator = root.GetFileSystemInfos(pattern, SearchOption.TopDirectoryOnly).AsEnumerable<FileSystemInfo>().GetEnumerator();
                }
                catch (Exception ex)
                {
                    errors.Add(ex);
                    fileEnumerator = null;
                }

                try
                {
                    directoryEnumerator = root.GetDirectories(pattern, SearchOption.TopDirectoryOnly).AsEnumerable<DirectoryInfo>().GetEnumerator();
                }
                catch (Exception ex)
                {
                    errors.Add(ex);
                    directoryEnumerator = null;
                }
            }
        }
    }
    public IEnumerator<FileSystemInfo> GetEnumerator()
    {
        return new Enumerator(root, pattern, searchOption, errors);
    }

    System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM