简体   繁体   中英

How to perform mass inserts into mongodb using NodeJS

I Have to Insert about 10,00000 documents in mongodb using nodejs.

I'm generating these documents using a for loop storing them into an array before finally inserting them into mongodb.

var codeArray = new Array();
for (var i = 0; i<1000000; i++){
    var token = strNpm.generate();
    var now = moment().format('YYYYMMDD hhmmss');
    var doc1 = {id:token,
        Discount_strId:"pending",
        Promotion_strCode:token,
        Promotion_strStatus:"I",
        Promotion_dtmGeneratedDate:now,
        User_strLogin:"test",
        Promotion_strMode:"S",
        Promotion_dtmValidFrom:"pending",
        Promotion_dtmValidTill:"pending",
        LastModified_dtmStamp:now
    };
    codeArray.push(doc1);
    db.collection('ClPromoCodeMaster').insert(codeArray, function (err, result) {
    if (err){
        console.log(err);
    }else{
        console.log('Inserted Records - ', result.ops.length);
    }
});

The problem I'm facing is mongo has an inserting limit of 16mb, so I can't insert the entire array at once. Please suggest most optimum solutions.

The main problem is in the request size and not the document size, but it amounts to the same limitation. Bulk operations and the async library with async.whilst will handle this:

var bulk = db.collection('ClPromoCodeMaster').initializeOrderedBulkOp(),
    i = 0;

async.whilst(
  function() { return i < 1000000; },
  function(callback) {
    var token = strNpm.generate();
    var now = moment().format('YYYYMMDD hhmmss');
    var doc = {
      id:token,
      Discount_strId:"pending",
      Promotion_strCode:token,
      Promotion_strStatus:"I",
      Promotion_dtmGeneratedDate:now,
      User_strLogin:"test",
      Promotion_strMode:"S",
      Promotion_dtmValidFrom:"pending",
      Promotion_dtmValidTill:"pending",
      LastModified_dtmStamp:now
    };

    bulk.insert(doc);
    i++;

    // Drain every 1000
    if ( i % 1000 == 0 ) {
      bulk.execute(function(err,response){
        bulk = db.collection('ClPromoCodeMaster').initializeOrderedBulkOp();
        callback(err);
      });
    } else {
        callback();
    }

  },
  function(err) {
    if (err) throw err;
    console.log("done");
  }
);

I should note that regardless there is an internal limit on bulk operations to 1000 operations per batch. You can submit in larger sizes, but the driver is just going to break these up and still submit in batches of 1000.

The 1000 is a good number to stay at though, since it is already in line with how the request will be handled, as well as being a reasonable number of things to hold in memory before draining the request queue and sending to the server.

For inserting millions of record at a time, Create node.js child process fork with MongoDb bulk api.

Child Process Creation:(index.js)

const {fork} = require("child_process");
let counter = 1;

function createProcess(data){
    const worker =  fork("./dbOperation");    
    worker.send(data);    
    worker.on("message", (msg) => {        
        console.log("Worker Message :",counter, msg);
        counter++;
    })

}

function bulkSaveUser(records) {
    const singleBatchCount = 10000; // Save 10,000 records per hit
    const noOfProcess = Math.ceil(records/singleBatchCount);
    let data = {};
    console.log("No of Process :", noOfProcess);
    for(let index = 1; index <= noOfProcess; index++) {       
        data.startCount = (index == 1) ? index : (((index - 1) * singleBatchCount) + 1); 
        data.endCount = index * singleBatchCount;
        createProcess(data);
    }
} 


bulkSaveUser(1500000);

DB Operation (dbOperation.js)

const MongoClient = require('mongodb').MongoClient;
// Collection Name
const collectionName = ""; 
// DB Connection String
const connString = "";

process.on("message", (msg) => {
    console.log("Initialize Child Process", msg)
    const {startCount, endCount} = msg;
    inputStudents(startCount, endCount);
});

function initConnection() {
    return new Promise(function(r, e) {
        MongoClient.connect(connString, function(err, db) {
            if (err) e(err)            
            r(db);
        });
    });
}

function inputStudents(startCount, endCount) {    

    let bulkData = [];
    for(let index = startCount; index <= endCount; index++ ){ 
        var types = ['exam', 'quiz', 'homework', 'homework'];
        let scores = []
        // and each class has 4 grades
        for (j = 0; j < 4; j++) {
            scores.push({'type':types[j],'score':Math.random()*100});
        }
        // there are 500 different classes that they can take
        class_id = Math.floor(Math.random()*501); // get a class id between 0 and 500
        record = {'student_id':index, 'scores':scores, 'class_id':class_id};
        bulkData.push({ insertOne : { "document" : record } })
    }
    initConnection()
        .then((db) => {
            const studentDb = db.db("student");
            const collection =  studentDb.collection(colName)  
            console.log("Bulk Data :", bulkData.length);
            collection.bulkWrite(bulkData, function(err, res) {
                if (err) throw err;
                //console.log("Connected Successfully",res);
                process.send("Saved Successfully");
                db.close();
            });       
        })
        .catch((err) => { console.log("Err :", err) });        
}

Sample project to insert millions of record in mongodb using child process fork

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM