简体   繁体   中英

Forcing Sequential Code in node.js

I finally figured out how callbacks work in node.js, but I'm trying now to get my code to execute in order.

The goal is to (in order):

  1. Load the URL into cheerio
  2. Parse through each <td> in the <tbody> on the page.
  3. Once text elements are loaded into the data array, callback.
  4. Call loopThroughData on the full Data array.
  5. Loop through the data array and call the lookForPlayer array on each one, which:
  6. Runs a SELECT in my db that matches the player name passed from the text element, and if there is no match in my db, INSERT them (I have it just printing to the console for now for testing purposes).

The end goal is to go through every page (there is a separate URL for each date, so I am looping through the dates) and INSERT players that aren't in my database ONCE. The problem is that it goes through each SELECT before the INSERT queries are executed, so it's inserting them multiple times.

Here is the page I'm parsing, if it helps: http://www.basketball-reference.com/friv/dailyleaders.cgi?month=12&day=29&year=2014

Here is my code:

function loadPage (url, callback){
    request(url, function(err, response, body){
        if(!err && response.statusCode ==200){
            var $ = cheerio.load(body);
            rowsRemaining = $.length;
            $('td', 'tbody').each(function(){
                var text = $(this).text();
                data.push(text);
                rowsRemaining -= 1;
                console.log('rows left: ',rowsRemaining);
            });
        }
        if (rowsRemaining == 0){
            console.log('$ length: ',$.length);
            callback(data);
        }
    });
}

function loopThroughData (data, callback){
    for(i=1;i<data.length;i+=26){
        lookForPlayer(data[i].replace("'",""),function(name){
            /* var insertPlayer = connection.query(
                'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
            }); */
            console.log('i is currently = ',i);

        });
    }
    callback();
}

function lookForPlayer(name, callback){
    console.log('Looking for Player...');
    var selectPlayer = connection.query(
    "SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
    if(err) throw err;
    if(rows.length==0){
        callback(name);
    }
    });
}

//loop through every day since the season started
for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
    console.log('d = ',d);
    loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
        console.log('Page loaded...');
        loopThroughData(data,function(){

        });
    });
}

As you can see, I tried adding a rowsRemaining variable that is meant to make sure I've parsed the whole file before calling the callback in the loadPage function, but it never gets to that point. Note that I initialize a lot of these variables before these functions (rowsRemaining, data, etc).

It also seems to loop through every date before fully loading, parsing, and INSERTing the first page, which it should not be doing.

Here is the updated code based off of @Brant's answer

   function loadPage (url, callback){
    request(url, function(err, response, body){
        if(!err && response.statusCode ==200){
            var $ = cheerio.load(body);
            console.log(url);
            $('td', 'tbody').each(function(){
                var text = $(this).text();
                data.push(text);
            });
        }
        callback(data);
    });
}

function loopThroughData (data, callback){
    for(i=1;i<data.length;i+=26){
        lookForPlayer(data[i].replace("'",""),function(name){
            var insertPlayer = connection.query(
                'INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = "'+data[i+1]+'"),1,"'+name+'");',function(err,result,fields){
            });

        });
    }
    callback(data);
}

function lookForPlayer(name, callback){
    var selectPlayer = connection.query(
    "SELECT * FROM player WHERE name = '"+name+"'", function(err, rows, fields){
    if(err) throw err;
    if(rows.length==0){
        console.log(name,' was not found in DB!');
        callback(name);
    }
    });
}

//loop through every day since the season started
for (d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
    validDatesArr.push(d);
}

async.eachSeries(validDatesArr,
    function(validDatesArr, callback){
    loadPage(baseURL+'/month='+validDatesArr.getMonth()+1+'&day='+validDatesArr.getDate()+'&year='+validDatesArr.getFullYear(),function(data){
        loopThroughData(data, function(){
            callback();
        });
    });
    }, function(err){
        if(!err){
            console.log('We processed each date requests one by one');
        }
    }
);

So now it's loading the pages one by one, but it isn't executing the INSERT function in the loopThroughData function on that data. I would think I would just add another function to the async list, but this particular one is calling a function as opposed to using an anonymous one.

Modify your for loop to be as follows:

//loop through every day since the season started
var validDatesArr = [];

for (var d = seasonStart; d <= Date.now(); d.setDate(d.getDate() + 1)){
    validDatesArr.push(d);
}

async.eachSeries(validDatesArr, 
    function(d, callback) {
        loadPage(baseURL+(d.getMonth()+1)+'&day='+d.getDate()+'&year='+d.getFullYear(),function(data){
            console.log('Page loaded...');
            loopThroughData(data,function(){
                callback();
            });
        });
    }, function(err) {
        if(!err) {
            console.log('We processed each date request one by one')
        }
    }
);

And require async which can be found here: https://github.com/caolan/async

npm install async

You can nested the Async function to control the execute flow like in a sequence programming, be careful at the Pyramid of doom, the other solution is to use the Sync version of the async functions you used (if exist). You are not forced to write Async function if you do NOT need them, Node.js use a lot of Async function because is a Non-bloking language very powerful for web development. So do NOT use the asyn style and the callback in your functions !

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM