简体   繁体   中英

Walking a directory with Node.js

I've got a problem with this code in node.js. I want to recursively walk through a directory tree and apply the callback action to every file in the tree. This is my code at the moment:

var fs = require("fs");

// General function
var dive = function (dir, action) {
  // Assert that it's a function
  if (typeof action !== "function")
    action = function (error, file) { };

  // Read the directory
  fs.readdir(dir, function (err, list) {
    // Return the error if something went wrong
    if (err)
      return action(err);

    // For every file in the list
    list.forEach(function (file) {
      // Full path of that file
      path = dir + "/" + file;
      // Get the file's stats
      fs.stat(path, function (err, stat) {
        console.log(stat);
        // If the file is a directory
        if (stat && stat.isDirectory())
          // Dive into the directory
          dive(path, action);
        else
          // Call the action
          action(null, path);
      });
    });
  });
};

The problem is that in the for each loop stat is called for every file via the variable path . When the callback is called, path already has another value and so it dive s into the wrong directories or calls the action for the wrong files.

Probably this problem could easily get solved by using fs.statSync , but this is not the solution I would prefer, since it is blocking the process.

var path = dir + "/" + file;

You forgot to make path a local variable. Now it won't be changed behind your back in the loop.

Use node-dir for this. Because you need a separate action for directories and files, I'll give you 2 simple iterators using node-dir.

Asynchronously iterate the files of a directory and its subdirectories and pass an array of file paths to a callback.

var dir = require('node-dir');

dir.files(__dirname, function(err, files) {
  if (err) throw err;
  console.log(files);
  //we have an array of files now, so now we'll iterate that array
  files.forEach(function(filepath) {
    actionOnFile(null, filepath);
  })
});

Asynchronously iterate the subdirectories of a directory and its subdirectories and pass an array of directory paths to a callback.

var dir = require('node-dir');

dir.subdirs(__dirname, function(err, subdirs) {
  if (err) throw err;
  console.log(subdirs);
  //we have an array of subdirs now, so now we'll iterate that array
  subdirs.forEach(function(filepath) {
    actionOnDir(null, filepath);
  })
});

Another suitable library is filehound . It supports file filtering (if required), callbacks and promises.

For example:

const Filehound = require('filehound');

function action(file) {
  console.log(`process ${file}`)
}

Filehound.create()
.find((err, files) => {
    if (err) {
        return console.error(`error: ${err}`);
    }

    files.forEach(action);
});

The library is well documented and provides numerous examples of common use cases. https://github.com/nspragg/filehound

Disclaimer: I'm the author.

Not sure if I should really post this as an answer, but for your convenience and other users, here is a rewritten version of OP's which might prove useful. It provides:

  • Better error management support
  • A global completion callback which is called when the exploration is complete

The code:

/**
 * dir: path to the directory to explore
 * action(file, stat): called on each file or until an error occurs. file: path to the file. stat: stat of the file (retrived by fs.stat)
 * done(err): called one time when the process is complete. err is undifined is everything was ok. the error that stopped the process otherwise
 */
var walk = function(dir, action, done) {

    // this flag will indicate if an error occured (in this case we don't want to go on walking the tree)
    var dead = false;

    // this flag will store the number of pending async operations
    var pending = 0;

    var fail = function(err) {
        if(!dead) {
            dead = true;
            done(err);
        }
    };

    var checkSuccess = function() {
        if(!dead && pending == 0) {
            done();
        }
    };

    var performAction = function(file, stat) {
        if(!dead) {
            try {
                action(file, stat);
            }
            catch(error) {
                fail(error);
            }
        }
    };

    // this function will recursively explore one directory in the context defined by the variables above
    var dive = function(dir) {
        pending++; // async operation starting after this line
        fs.readdir(dir, function(err, list) {
            if(!dead) { // if we are already dead, we don't do anything
                if (err) {
                    fail(err); // if an error occured, let's fail
                }
                else { // iterate over the files
                    list.forEach(function(file) {
                        if(!dead) { // if we are already dead, we don't do anything
                            var path = dir + "/" + file;
                            pending++; // async operation starting after this line
                            fs.stat(path, function(err, stat) {
                                if(!dead) { // if we are already dead, we don't do anything
                                    if (err) {
                                        fail(err); // if an error occured, let's fail
                                    }
                                    else {
                                        if (stat && stat.isDirectory()) {
                                            dive(path); // it's a directory, let's explore recursively
                                        }
                                        else {
                                            performAction(path, stat); // it's not a directory, just perform the action
                                        }
                                        pending--; checkSuccess(); // async operation complete
                                    }
                                }
                            });
                        }
                    });
                    pending--; checkSuccess(); // async operation complete
                }
            }
        });
    };

    // start exploration
    dive(dir);
};

Don't reinvent the wheel - use and contribute to open source instead. Try one of the following:

There is an NPM module for this:

npm dree

Example:

const dree = require('dree');
const options = {
    depth: 5,                        // To stop after 5 directory levels
    exclude: /dir_to_exclude/,       // To exclude some pahts with a regexp
    extensions: [ 'txt', 'jpg' ]     // To include only some extensions
};

const fileCallback = function (file) {
    action(file.path);
};

let tree;

// Doing it synchronously
tree = dree.scan('./dir', options, fileCallback);

// Doing it asynchronously (returns promise)
tree = await dree.scanAsync('./dir', options, fileCallback);

// Here tree contains an object representing the whole directory tree (filtered with options)
function loop( ) {
    var item = list.shift( );
    if ( item ) {
        // content of the loop
        functionWithCallback( loop );
    } else {
        // after the loop has ended
        whatever( );
    }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM