简体   繁体   中英

memory leak in node.js app on AWS

I have some code in node, basically it is making api calls to external service and dump the returned data into a database. But it must have some serious memory leak since the node server will run out of memory in the middle. The AWS instance I am using is 2CPU, 4GB RAM. I spent a lot of time to figure out where is the leak with no luck yet. Below is the code, any hint will be helpful.

function refreshSitesBy5Min(rawData, callback){

var sites = JSON.parse(rawData).data;

if (typeof sites !== 'undefined' && sites.length > 0){
        log.info('refreshing sites 5min');
        sites.forEach(function(elem, index, array){

        db.site_5min.find({siteid: elem.id, ts : moment(elem.ts).format('YYYY-MM-DDThh:mm:ss')}, function(err, found){

            if (typeof found === 'undefined' || found == null || found.length == 0){
                db.site_5min.save({
                    siteid : elem.id, 
                    gran : '5min', 
                    ts : moment(elem.ts).format('YYYY-MM-DDThh:mm:ss'), 
                    wh_sum : elem.Wh_sum
                }, function(err, inserted){
                    if (err){
                        log.error(err);
                    }
                });     
            }
            else{
                db.site_5min.save({
                    id: found.id, 
                    siteid : elem.id, 
                    gran : '5min', 
                    ts : moment(elem.ts).format('YYYY-MM-DDThh:mm:ss'), 
                    wh_sum : elem.Wh_sum
                }, function(err, updated){
                    if (err){
                        log.error(err);
                    }
                })
            }
        })

    })
}
else{
    log.warn('no sites data');
}

callback();

}

and this is the code to call previous method:

function refreshSiteByGran(globalToken, gran, frequency){

log.info('refreshing site for ' + gran + ' table');

// db.site.find({}, function(err, sites){
db.run("select * from site", function(err, sites){
    if (err){
        log.error(err);
    }
    if (sites){

        function handler(i){
            if (i < sites.length){
                var thePath = '/v3/sites/' + sites[i].siteid + '/data?fields=Wh_sum&tz=US/Pacific&gran=' + gran;
                var end = moment().subtract(1, 'days').format('YYYY-MM-DDThh:mm:ss');
                var start;
                if (gran === '5min' || gran === 'hourly'){
                    start = moment(end).subtract(frequency, 'days').format('YYYY-MM-DDThh:mm:ss');
                }
                else if (gran === 'daily'){
                    start = moment(end).subtract(frequency, 'days').format('YYYY-MM-DDThh:mm:ss');
                }
                else if (gran === 'monthly'){
                    start = moment(end).subtract(frequency, 'months').format('YYYY-MM-DDThh:mm:ss');
                }
                thePath = thePath + '&start=' + start + '&end=' + end;
                log.warn('thePath: ' + thePath);

                var options = locusUtil.setOptions(thePath, globalToken.token.access_token);
                request(options, function(err, result, body){
                    if (err){
                        log.error(err + ' path: ' + thePath);
                    }
                    if (body && JSON.parse(body).statusCode == 401){
                        getLocusToken(function(){
                            setTimeout(function(){
                                handler(i);
                            }, 2000);
                        })
                    }
                    else if (body && JSON.parse(body).statusCode == 200){
                        var data = JSON.parse(body).data;
                        // log.info('any data? ' + JSON.stringify(body, null, 4));
                        if (typeof data !== 'undefined' && data.length > 0){
                            if (gran === '5min'){
                                refreshSitesBy5Min(body, function(){
                                    log.info('inserted: ' + data[0].id);
                                    setTimeout(function(){
                                        handler(i+1);
                                    }, 2000);   
                                })
                            }
                            if (gran === 'hourly'){
                                refreshSitesByHourly(body, function(){
                                    log.info('inserted: ' + data[0].id);
                                    setTimeout(function(){
                                        handler(i+1);
                                    }, 2000);   
                                })
                            }
                            if (gran === 'daily'){
                                refreshSitesByDaily(body, function(){
                                    log.info('inserted: ' + data[0].id);
                                    setTimeout(function(){
                                        handler(i+1);
                                    }, 2000);   
                                })
                            }
                            if (gran === 'monthly'){
                                refreshSitesByMonthly(body, function(){
                                    log.info('inserted: ' + data[0].id);
                                    setTimeout(function(){
                                        handler(i+1);
                                    }, 2000);       
                                })
                            }
                        }
                        else{
                            setTimeout(function(){
                                handler(i+1);
                            }, 2000);
                        }

                    }
                    // re-try for concurrency error
                    else if (body && JSON.parse(body).statusCode == 429){
                        log.warn('error body ' + JSON.stringify(body));
                        setTimeout(function(){
                            handler(i);
                        }, 2000);
                    }
                    // if any other error, just skip
                    else {
                        setTimeout(function(){
                            handler(i+1);
                        }, 2000);
                    }
                })
            }
            else{
                return;
            }
        }
        handler(0);
    }
});

}

I believe the problem is inside this two blocks, I used memwatch to monitor v8 garbage collection, I see usage_trend is increasing fast, so it must have leaks.

This is very simple to solve...

First, get rid of the forEach loop, located here... sites.forEach(function(elem, index, array){

Instead, create a recursive function that simply passes an index to the following iteration. What this does is create a loop that executes correctly in accordance to the given CPU and memory allotted. No need for process.nextTick() or any of that fancy jazz.

Asynchronous loops are not technically the answer, as they overload systems rather quickly with thousands of queues. Instead, iterate through each record, then only proceed to the next when the current process is finished.

Also, delete the current array index before proceeding to the next. Eventually, the end of the loop is reached when the index returns "undefined". That is when the callback to the main function refreshSitesBy5Min is summoned.

function refreshSitesBy5Min(rawData, callback) { var sites = JSON.parse(rawData).data getSite(0) function getSite(index) { // we have reached the end if(!sites[index])
return callback() runProcess(sites[index] // clear up memory after every iteration delete sites[index] // done with iteration, move on to the next getSite(++index) } }

Still not done yet...

Big JSON Object If your JSON object is massive, you will need to stream your JSON and handle tiny chunks at a time. https://github.com/uhop/stream-json

Big Database Result Set Your SQL query should utilize limit if you are returning more than 500 records results at a time, even smaller is better. So if your record set being returned is 100,000. Just grab 500 at a time in a recursive function, simply increment the index and multiply it by num_records, in this scenario: 500.

var offset = iter * 500 limit: [offset, 500]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM