I have about 30,000 documents in a MongoDB collection. And have been stuck in developing a node.js script to retrieve only the records with a specific string key-value pair.
this query on MongoDB server returns me the exact results I've been looking for:
db.getCollection('posts').find({authorName: "Ashwin-kumar"})
Returns me about 33 documents instantly. Likewise I've about 40 authors with different names.
Here's my node.js script to retrieve posts by authorName (Yes, it is based on Name, a string, as there is no ID for these authors :( ):
var fs = require('fs'),
request = require('request'),
async = require("async"),
assert = require('assert');
_ = require('lodash'),
MongoClient = require('mongodb').MongoClient;
var db, postsCollection, postCol;
async.series([dbConnect, checkCollection, createMeta, dbClose], function(){
console.log("Executed all calls in series.");
process.exit(0);
});
function dbConnect(callback){
MongoClient.connect("mongodb://localhost:27017/jPosts", function(pErr, pDb) {
if(pErr) {
console.dir(pDb);
return 0;
}
db = pDb;
callback();
});
}
function dbClose(callback){
db.close(true, function (err) {
if (err) console.error(err);
else console.log("close complete");
callback();
});
}
function checkCollection(callback) {
db.collection('posts', function(err, collection) {});
postsCollection = db.collection('posts');
postCol = db.collection('posts');
callback();
}
function createMeta(callback){
var meta = [];
postsCollection.aggregate([
{
$group : {_id : "$authorName"}
}]).toArray(function(err, result) {
assert.equal(err, null);
async.forEachLimit(result, 1, function(pPost, callback) {
getPosts(pPost._id, callback);
}, function(err) {
console.log(err);
callback();
});
});
}
function getPosts(pAuthor, callback){
var cursor = postCol.find({ "authorName": pAuthor});
cursor.toArray(function(err,items){
if(err)
callback(err);
else
callback(null, items);
});
}
This does not seem to work for me. cursor.toArray() does nothing but wait forever. Is it because of too many fields in each document?
I tried to get the count of the documents the cursor fetched and it works well.
function getPosts(pAuthor, callback){
var cursor = postCol.find({ "authourName": pAuthor});
cursor.count().then(function(items_count) {
console.log(items_count);
callback();
});
}
Also, I tried the cursor's .each method to iterate the documents fetched. But no luck yet.
function getPosts(pAuthor, callback){
var cursor = postCol.find({ "authourName": pAuthor});
cursor.each(function(err, doc) {
assert.equal(err, null);
if (doc != null) {
console.dir(doc);
} else {
console.log(err);
}
});
}
Am I missing something here? What else can be done to make this work? Is there any issues with the way I'm using async?
PS: The idea here is to query the dump and generate the PDF's for authours in the jPost collection.
PS 2: Here's a sample document
{
"_id" : ObjectId("571d36b55672f713fe346a66"),
"id" : 56517,
"authorName" : "Ashwin-kumar",
"comment_count" : 380,
"tagline" : "... Opinions you don't really need",
"vote_count" : 5152,
"exclusive" : null,
"post": [
],
"post_comments" : [
//comment_count objects
],
"date" : "2016-03-27"
}
(I've omitted post & post_comments parts for brevity.)
try this:
var collection = db.collection("collection_name");
collection.find({authourName: "Ashwin-kumar"}).toArray(function (err,items) {
if (err) {
console.dir(err);
} else {
//do something with items array
console.dir(items);
}
});
Did you check what is the value of pAuthor in getPosts? Because when you do aggregation, you receive a collection of objects with _id field (not authourName), so you should do:
// not sure why you need meta array, at least it's not used in the code you provided
meta.push({
author: pPost._id
});
getPosts(pPost._id, callback);
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.