简体   繁体   English

在Node.js中将Async与Waterfall和Recursion结合使用

[英]Using Async With waterfall and Recursion in nodejs

I've created a script to migrate data from Dynamo to a Mysql DB. 我创建了一个脚本,用于将数据从Dynamo迁移到Mysql DB。 First I was not using Async, but I started getting bottlenecks on the sql side, so I decided to "throttle" the dymano part using the async lib. 首先,我没有使用Async,但是我开始在sql方面遇到瓶颈,因此我决定使用async库“限制” dymano部分。 The problem: I have a recursion in the middle of the path, as long as dynamo has data I have to continue the process (ultra simple ETL), but I don't know how to perform the recursion inside the waterfall. 问题:只要发电机具有数据,我就必须在路径的中间进行递归(超简单ETL),但是我不知道如何在瀑布内部执行递归。 My code : 我的代码:

function main() {
    async.waterfall([getMaxTimestamp, scanDynamoDB, printout, saveToMySQL], function(err, result) {
      if(err) console.log(err)
      console.log(result)
    });
}

function getMaxTimestamp(callback) {
    console.time("max query");
    connection.query("SELECT MAX(created_at) as start_date from Tracking;", function(err, data) {
        console.timeEnd("max query");
        callback(err, data);
    })
}

function scanDynamoDB(data, callback) {
    if (data[0].start_date != null && data[0].start_date)
        query.ExpressionAttributeValues[':v_ca'].N = data[0].start_date;

    console.time("dynamo read");
    dynamoDB.scan(query, function(err, data) {
        console.timeEnd("dynamo read");
        callback(err, data);
        // if (!err) {
        //     if (data != undefined && data.Count > 0) {
        //         printout(data.Items) // Print out the subset of results.
        //         if (data.LastEvaluatedKey) { // Result is incomplete; there is more to come.
        //             query.ExclusiveStartKey = data.LastEvaluatedKey;
        //             scanDynamoDB(query);
        //         }
        //     } else {
        //         console.log('No fresh data found on Dynamo')
        // } else console.dir(err);
    });
};

function assembleSql() {
    insertSql = "insert into Tracking (";
    for (var i = 0; i < headers.length; i++) {
        insertSql += headers[i];
        if (i < headers.length - 1)
            insertSql += ",";
    }

    insertSql += ") values ?;"
    previousInsertSql = insertSql;
}

function saveToMySQL(items, callback) {
    assembleSql();
    //connection.connect();
    console.time("insert sql")
    connection.query(insertSql, [items], function(err, result) {
        console.timeEnd("insert sql")
        if (err){
          callback(err, null)
          return;
        }

        totalInserts += result.affectedRows;
        callback(err, totalInserts)
        //connection.end();
    })
}

function printout(items, callback) {
    var headersMap = {};
    var values;
    var header;
    var value;

    var out = [];

    if (headers.length == 0) {
        if (items.length > 0) {
            for (var i = 0; i < items.length; i++) {
                for (var key in items[i]) {
                    headersMap[key] = true;
                }
            }
        }
        for (var key in headersMap) {
            headers.push(key);
        }
    }

    for (index in items) {
        values = [];
        for (i = 0; i < headers.length; i++) {
            value = "";
            header = headers[i];
            // Loop through the header rows, adding values if they exist
            if (items[index].hasOwnProperty(header)) {
                if (items[index][header].N) {
                    value = items[index][header].N;
                } else if (items[index][header].S) {
                    value = items[index][header].S;
                } else if (items[index][header].SS) {
                    value = items[index][header].SS.toString();
                } else if (items[index][header].NS) {
                    value = items[index][header].NS.toString();
                } else if (items[index][header].B) {
                    value = items[index][header].B.toString('base64');
                } else if (items[index][header].M) {
                    value = JSON.stringify(items[index][header].M);
                } else if (items[index][header].L) {
                    value = JSON.stringify(items[index][header].L);
                } else if (items[index][header].BOOL !== undefined) {
                    value = items[index][header].BOOL.toString();
                }
            }
            values.push(value)
        }
        out.push(values)
    }
    callback(null, out);
}
main();

The commented part is where the recursion happens, but I don't know where to place this inside my flow ! 评论的部分是递归发生的地方,但是我不知道将其放在我的流程中的什么位置!

Any help would be appreciated ! 任何帮助,将不胜感激 !

Just don't call callback function inside scanDynamoDB while fetching data. 只是在获取数据时不要在scanDynamoDB内部调用回调函数。 You can implement additional function and call it recursive while errors is not appears, like below 您可以实现附加功能并在不出现错误的情况下递归调用它,如下所示

function scanDynamoDB(data, callback) {
    if (data[0].start_date != null && data[0].start_date)
        query.ExpressionAttributeValues[':v_ca'].N = data[0].start_date;

    console.time("dynamo read");

    var result = []; // for accumulate data of each query

    function readNext(err, data) {
        if (err)
            return callback(err);

        if (!data || !data.Count)   
            return callback(null, result);

        // add data to result   

        dynamoDB.scan(query, readNext);
    }

    dynamoDB.scan(query, readNext);
};

Actually I was able to figure it out by myself. 其实我自己就能弄清楚。

async.whilst(function() { return canInsert}, function (callback){
          scanDynamoDB(query, callback)
        }, function(err, res) {}
function scanDynamoDB(data, callback) {
    console.time("dynamo read");

    dynamoDB.scan(query, function(err, data) {
        console.timeEnd("dynamo read");
        if (!err) {
            if (data != undefined && data.Count > 0) {
                canInsert = data.LastEvaluatedKey;
                if (data.LastEvaluatedKey) // Result is incomplete; there is more to come.
                    query.ExclusiveStartKey = data.LastEvaluatedKey;
            }
        } else console.dir(err);
    });
};

I could have done it just with a while(canInsert) . 我只用while(canInsert)就可以做到while(canInsert) Anyway, I avoided recursion and memory usage is way way lower. 无论如何,我避免了递归,并且内存使用率更低。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM