简体   繁体   中英

Code stalling at collection.find

I am new to JS and am stuck at this point. I am trying to clear my DBs before starting a new query and it keeps stalling at the collection.find command. If I removed the code to clear the DB everything works fine.

router.get('/parse', function(req, res) {
    collection.remove({})
    collectionweb.remove({})
    collectionhit.remove({})
    //Converter Class
    var Converter = require("csvtojson").core.Converter;
    var fs = require("fs");
    var csvFileName = "./download/company.csv";
    var fileStream = fs.createReadStream(csvFileName);
    //new converter instance
    var param = {};
    var csvConverter = new Converter(param);
    request({
        uri: "http://online.barrons.com/news/articles/SB50001424053111904537004580085820431503044?mod=BOL_twm_fs",
    }, function(error, response, body) {
        collectionweb.insert({
            "website": body
        });
    });
    //end_parsed will be emitted once parsing finished
    csvConverter.on("end_parsed", function(jsonObj) {
        //Insert into DB
        collection.insert(jsonObj);
    });
    fileStream.pipe(csvConverter);
    collectionweb.find({}, function(e, docs1) {
        for (var j in docs1) {
            var body = docs1[j]
            var webs = body.website
            console.log(1)
            collection.find({}, function(e, docs) {
                for (var i in docs) {
                    console.log(2)
                    var companies = docs[i]
                    var Score = 0;
                    $words = webs.search(companies.Symbol);
                    console.log(3)
                    if ($words > 0) {
                        StockScore++console.log(Score)
                        collectionhit.insert(companies)
                        collectionhit.update({
                            "Name": companies.Name
                        }, {
                            '$set': {
                                "score": Score
                            }
                        })
                    } else {};
                };
            });
        };
    });
});

There are a handful of problems, but they share one common denominator: you don't yet understand that Node.js is asynchronous. Just google "node.js asynchronous" and you'll get a handful of resources, or simply look it up here on SO (eg How do I get started with Node.js? ).

The gist of it, is to wait for a callback or an event, for example:

var eiot = new EventedIOThing('paaarammm');

// use once, unless you for sure need to listen for the event multiple times
eiot.once('open',function onEIOTOpen() {
    console.log('opened the thing.');
}).once('error',function onEIOTError(err) {
    console.warn('there were problemzzz');
}).once('end',function onEIOTEnd() {
    // successfully finished evented IO thing...
    someAction(this.dep,'anotherparam',function callMeWhenActionIsDone(err,result) {
        if ( err ) {
            console.warn('someAction had a problem!',err);
            return; // exit early if we didn't get an optimal result
        }
        anotherDependentAction(result,function callMeWhenSecondActionIsDone(err,result) {
            if ( err ) { // this 'err' is local to this function's scope
                console.warn('anotherDependentAction had a problem!',err);
                return; // exit early again
            }
            console.log('All done... what do you want to do next?');
        });
    });
});

The above code is pretty self explanatory given the variable/function names and comments, but pay close attention to the way methods are invoked, and most notably when they are invoked. Things don't happen in succession, instead code is on "stand-by" until dependent/proper things happen with successful results, and only then does the program flow continue.

The down-side to the above coding style, is you'll eventually get many nested functions deep. This is where libs like async come into play. It allows for shallow function program flow: you specify an array of functions, and async handles when the callbacks should be invoked internally, you just have to worry about the sequence.

Now, with the code you currently have, what we learned from the first example, and 1-upping it with the introduction of the async module , it may be rewritten as follows:

var async = require('async'), // https://github.com/caolan/async
    fs = require('fs'),
    Converter = require('csvtojson').core.Converter;

router.get('/parse',function cbGetParse(req, res) {
    async.series([
        collection.remove.bind(collection),
        collectionweb.remove.bind(collectionweb),
        collectionhit.remove.bind(collectionhit),
        function convertCsv(callback) {
            var cbCalled = false; // i don't trust csvtojson to handle errors properly
            fs.createReadStream('./download/company.csv')
                .once('error',function(err) {
                    if ( !cbCalled ) {
                        cbCalled = true;
                        callback(err,null);
                    }
                })
                .pipe(new Converter({})) // pipe returns an instance of the Converter object
                .once('end_parsed',function onConverterEnd(jsonObj) {
                    collection.insert(jsonObj,function cbCollInsert(err,result) {
                        if ( !cbCalled ) {
                            cbCalled = true;
                            callback(err,result);
                        }
                    });
                });
        },
        function barronsHttpRequest(callback) {
            request({
                uri: 'http://online.barrons.com/news/articles/SB50001424053111904537004580085820431503044?mod=BOL_twm_fs',
            },function cbRequest(err,response,body) {
                if ( err ) {
                    callback(err,null);
                    return; // if err, exit early
                }
                collectionweb.insert({'website':body},function cbCollWebInsert(err,result) {
                    callback(err,result);
                });
            });
        },
        function lastChunkOfCode(callback) {
            // not going to rewrite this, same principle applies as above
            collectionweb.find({}, function(e, docs1) {
                for (var j in docs1) {
                    var body = docs1[j]
                    var webs = body.website
                    console.log(1)
                    collection.find({}, function(e, docs) {
                        for (var i in docs) {
                            console.log(2)
                            var companies = docs[i]
                            var Score = 0;
                            $words = webs.search(companies.Symbol);
                            console.log(3)
                            if ($words > 0) {
                                StockScore++console.log(Score)
                                collectionhit.insert(companies)
                                collectionhit.update({
                                    "Name": companies.Name
                                }, {
                                    '$set': {
                                        "score": Score
                                    }
                                })
                            } else {};
                        };
                    });
                };
                callback();
            });
        }
    ],function asyncComplete(err,result) {
        // you don't specify how to respond to the request so...
        if ( err ) {
            console.warn('Problem with /parse:',err);
        }
        res.end();
    });
});

I made a crap-ton of assumptions in how your script should work, so it may not be 100% what you want, but the asynchronous concept has been applied. Also, I didn't test this code. You need to determine what can be run in parallel vs series, what your control flow should look like and how you want to handles errors (errors do happen).

Do note that I did not implement async behavior into the last chunk of your script since I couldn't figure out what your collection relationships were -- and I'm not going to do all the work for you. I did notice that it could be optimized a bit. I don't see any reason to select all documents from both collections. You need to offload the selector/query processing onto the database, it shouldn't be in your application if you can help it.

Some take-aways:

  • Collection.remove (doc) accepts a callback, use it.
  • Same with Collection.insert (doc) -- even though the docs say the callback is optional, it should only be omitted in very rare cases. (I don't care if you're using write-concern or not.)
  • Mind your for loop:
  • A for...in should never be used, especially with an array, use a normal for or Array.forEach
  • When using any type of for loop with any async calls -- especially related to sockets, ie MongoDb -- you need to exercise patience and wait for callbacks otherwise you'll flood the socket (like a denial-of-service attack). I recommend using async.eachSeries or async.eachLimit
  • I like to name my Lambdas (oxymoron?), it helps with dissecting a stack trace.
  • Please use either " or ' , they do the exact same thing, don't mix them.
  • Develop in smaller chunks. Get one part working, then the next part, then the next one. Work small and abstract.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM