简体   繁体   中英

How to minify/compress thousands of JS files - including some large ones - at the same time or sequentially without crashing the console?

Context

With a demo I'm currently refactoring, I have a src folder that contains 196 MB. About 142 MB consist of two binary files.

About 2000 of the remaining 2137 files (which is about 46 MB) consists of JavaScript files, most of which belong to the official and complete distributions of the two large frameworks. The largest JavaScript file is about 23MB. It is unminified code originally written in C++ and compiled - with emscripten - to asm .

I wanted to write a Node.js script that copies all of my files from the src path to the dist path and minifies every JS or CSS file it encounters along the way. Unfortunately, the number and/or size of JS files involved seems to break my script.


Let's go through the steps I took...

Step 1

I started with writing a small build script that copied all data from my src folder to my dist folder. I was surprised to learn that this process finishes in a matter of seconds.

Herebelow is my code for this script. Note that you'll need Node 8 to run that code.

const util = require('util');
const fs = require('fs');
const path = require('path');

const mkdir = util.promisify(require('mkdirp'));
const rmdir = util.promisify(require('rimraf'));
const ncp = util.promisify(require('ncp').ncp);
const readdir = util.promisify(fs.readdir);
const readFile = util.promisify(fs.readFile);
const writeFile = util.promisify(fs.writeFile);
const stat = util.promisify(fs.stat);

const moveFrom = path.join(__dirname,"../scr");
const moveTo = path.join(__dirname,"../dist");

var copyFile = function(source, target) {
    return new Promise(function(resolve,reject){
        const rd = fs.createReadStream(source);
        rd.on('error', function(error){
            reject(error);
        });
        const wr = fs.createWriteStream(target);
        wr.on('error', function(error){
            reject(error);
        });
        wr.on('close', function(){
            resolve();
        });
        rd.pipe(wr);
    });
};

var copy = function(source, target) {
    stat(source)
    .then(function(stat){
        if(stat.isFile()) {
            console.log("Copying file %s", source);
            switch (path.extname(target)) {
                default:
                    return copyFile(source, target);
            }
        } else if( stat.isDirectory() ) {
            return build(source, target);
        }
    }).catch(function(error){
        console.error(error);
    });
};

var build = function(source, target) {
    readdir(source)
    .then(function(list) {
        return rmdir(target).then(function(){
            return list;
        });
    })
    .then(function(list) {
        return mkdir(target).then(function(){
            return list;
        });
    }).then(function(list) {
        list.forEach(function(item, index) {
            copy(path.join(source, item), path.join(target, item));
        });
    }).catch(function(error){
        console.error(error);
    })
};

build(moveFrom, moveTo);

Step 2

Tto minify my CSS files whenever I encountered them, I added CSS minification.

For that, I made the following modifications to my code.

First, I added this function :

var uglifyCSS = function(source, target) {
    readFile(source, "utf8")
    .then(function(content){
        return writeFile(target, require('ycssmin').cssmin(content), "utf8");
    }).catch(function(error){
        console.error(error);
    });
}

Then, I modified my copy function, like this :

var copy = function(source, target) {
    stat(source)
    .then(function(stat){
        if(stat.isFile()) {
            console.log("Copying file %s", source);
            switch (path.extname(target)) {
            case ".css":
                return uglifyCSS(source, target);
            default:
                return copyFile(source, target);
            }
        } else if( stat.isDirectory() ) {
            return build(source, target);
        }
    }).catch(function(error){
        console.error(error);
    });
};

So far, so good. Everything still runs smoothly at this stage.

Step 3

Then, I did the same to minify my JS.

So again, I added a new function :

var uglifyJS = function(source, target) {
    readFile(source, "utf8")
    .then(function(content){
        return writeFile(target, require('uglify-js').minify(content).code, "utf8");
    }).catch(function(error){
        console.error(error);
    });
}

Then, I modified my copy function again :

var copy = function(source, target) {
    stat(source)
    .then(function(stat){
        if(stat.isFile()) {
            console.log("Copying file %s", source);
            switch (path.extname(target)) {
            case ".css":
                return uglifyCSS(source, target);
            case ".js":
                return uglifyJS(source, target);
            default:
                return copyFile(source, target);
            }
        } else if( stat.isDirectory() ) {
            return build(source, target);
        }
    }).catch(function(error){
        console.error(error);
    });
};

The problem

Here, things go wrong. As the process keeps encountering more and more JS files, it keeps slowing down until the process seems to stop completely.

It appears that too many parallel processes get started and keep consuming more and more memory until no more memory is left and the process just dies silently. I tried other minifiers besides UglifyJS, and I experienced the same issue for all of them. So the problem doesn't appear to be specific to UglifyJS.

Any ideas how to fix this issue?

This is the complete code :

const util = require('util');
const fs = require('fs');
const path = require('path');

const mkdir = util.promisify(require('mkdirp'));
const rmdir = util.promisify(require('rimraf'));
const ncp = util.promisify(require('ncp').ncp);
const readdir = util.promisify(fs.readdir);
const readFile = util.promisify(fs.readFile);
const writeFile = util.promisify(fs.writeFile);
const stat = util.promisify(fs.stat);

const moveFrom = path.join(__dirname,"../scr");
const moveTo = path.join(__dirname,"../dist");

var copyFile = function(source, target) {
    return new Promise(function(resolve,reject){
        const rd = fs.createReadStream(source);
        rd.on('error', function(error){
            reject(error);
        });
        const wr = fs.createWriteStream(target);
        wr.on('error', function(error){
            reject(error);
        });
        wr.on('close', function(){
            resolve();
        });
        rd.pipe(wr);
    });
};

var uglifyCSS = function(source, target) {
    readFile(source, "utf8")
    .then(function(content){
        return writeFile(target, require('ycssmin').cssmin(content), "utf8");
    }).catch(function(error){
        console.error(error);
    });
}

var uglifyJS = function(source, target) {
    readFile(source, "utf8")
    .then(function(content){
        return writeFile(target, require('uglify-js').minify(content).code, "utf8");
    }).catch(function(error){
        console.error(error);
    });
}

var copy = function(source, target) {
    stat(source)
    .then(function(stat){
        if(stat.isFile()) {
            console.log("Copying file %s", source);
            switch (path.extname(target)) {
                    case ".css":
                        return uglifyCSS(source, target);
                            case ".js":
                                return uglifyJS(source, target);
                default:
                    return copyFile(source, target);
            }
        } else if( stat.isDirectory() ) {
            return build(source, target);
        }
    }).catch(function(error){
        console.error(error);
    });
};

var build = function(source, target) {
    readdir(source)
    .then(function(list) {
        return rmdir(target).then(function(){
            return list;
        });
    })
    .then(function(list) {
        return mkdir(target).then(function(){
            return list;
        });
    }).then(function(list) {
        list.forEach(function(item, index) {
            copy(path.join(source, item), path.join(target, item));
        });
    }).catch(function(error){
        console.error(error);
    })
};

build(moveFrom, moveTo);

Easy fix: your whole problem is that you have no bounds to your parellization:

list.forEach(function(item, index) {
        copy(path.join(source, item), path.join(target, item));
});

You synchronously dispatch async operations. That means they return immediately without you waiting. You either need to make the operations sequential or set a bound to the operations running. This will make a list of functions:

const copyOperations = list.map((item) => {
        return copy(path.join(source, item), path.join(target, item));
});

Then make them run in sequence :

const initialValue = Promise.resolve();
copyOperations.reduce((accumulatedPromise, nextFn) => {
    return accumulatedPromise.then(nextFn);
}, initialValue);

Now, if you want to wait for all of them to finish you need to return a promise, so the copy section of your code will look like this:

.then(function(list) {
    const copyOperations = list.map((item) => {
            return copy(path.join(source, item), path.join(target, item));
    });

    const allOperations = copyOperations.reduce((accumulatedPromise, nextFn) => {
        return accumulatedPromise.then(nextFn);
    }, Promise.resolve());

    return allOperations; 
})

This will of course just copy one file at a time, and should you require more operations to be done concurrently you need a fancier mechanism. Try out this promise pooling mechanism where you can set a threshold, like require('os').cpus().length;

Example of bounded parallellization using ES6 generator

just replace the body of the then function above with this

const PromisePool = require('es6-promise-pool')
const maxProcesses = require('os').cpus().length;

const copyOperations = list.map((item) => {
        return copy(path.join(source, item), path.join(target, item));
});

const promiseGenerator = function *(){
    copyOperations.forEach( operation => yield operation );
}

var pool = new PromisePool(promiseGenerator(), maxProcesses)

return pool.start()
  .then(function () {
    console.log('Complete')
  });

Oligofren 's suggestion didn't seem to help. Removing the 23 MB JS file did, however, fix the issue. So it looks like the problem was not the large number of files (as I suspected) but a file too big for NodeJs to handle. I suppose playing around with NodeJs's memory settings (eg. node --stack-size ) could fix that.

Anyway, while I still need a solution to get everything to work without removing the 23 MB file, I guess removing this one file from the files to be processed will have to do for now. It's pretty much just a proof-of-concept I was working on anyway.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM