简体   繁体   中英

Reading a file line by line, parse them and insert them in mongo in node js

I have a file which is tab separated. It has thousands of data. How can I use nodeJs to read the file, line by line, parse them and create an object and insert them in a mongo DB.

I am just learning node and mongo. I come from different background. So how can this be done.

Finally the Mongo DB has to be populated with proper data. I searched in net but I could not find the complete solution.

Thanks.

I had an issue with the answer by Juvenik . My problem was that the database would not be populated by the time readline had completed. The lines were being read synchronously, but the DB insertion was asynchronous.

Instead, I found a simpler solution with the line-reader package. It reads the lines and waits for a callback before continuing.

var MongoClient = require('mongodb').MongoClient

var dbName = 'yourDbName'
var url = 'mongodb://localhost:27017/' + dbName
var collectionName = 'yourCollectionName'
var filename = 'yourFileName.txt'

var printLine = 1000

MongoClient.connect(url, function(err, db) {
    if (err) {
        console.error('Problem connecting to database')
    } else {
        console.log('Connected correctly to server.')

        var lineReader = require('line-reader')

        var collection = db.collection(collectionName)
        var lineNum = -1
        var headers = []

        lineReader.eachLine(filename, function(line, last, cb) {
            lineNum++
            try {
                var split = line.split('\t')
                var object = {}

                if (lineNum > 0) {
                    for (var i = 0; i < split.length; i += 1) {
                        object[headers[i]] = split[i]
                    }

                    collection.insert(object, function (insertErr, insertObj) {
                        if (insertErr) console.error(insertErr)
                        if (lineNum % printLine === 0) console.log('Line ' + lineNum)
                        if (last) {
                            console.log('Done with ' + filename + ' (' + lineNum + ' records)')
                            process.exit(0)
                        } else {
                            cb()
                        }
                    })

                } else {
                    headers = line.split('\t')
                    cb()
                }

            } catch (lineError) {
                console.error(lineError)
            }
        })
    }
})

I came across similar problem. This approach worked for me. Have a look, it might be helpful.

var mongoDb         = require('mongodb');
var mongoClient     = mongoDb.MongoClient;
var dbname          = 'YOUR_DB_NAME';
var collectionName  = 'YOUR_COLLECTION_NAME';
var url             = 'mongodb://localhost:27017/'+dbname;
var filename        = 'FIle_Name.txt';
console.log('***************Process started');

mongoClient.connect(url,function(err,db){
    if(err){
        console.log('error on connection '+err);
    }
    else{
        console.log('***************Successfully connected to mongodb');
        var collection  = db.collection(collectionName);
        var fs          = require('fs');
        var readline    = require('readline');
        var stream      = require('stream');
        var instream    = fs.createReadStream(filename);
        var outstream   = new stream;
        var rl          = readline.createInterface(instream,outstream);

        console.log('***************Parsing, please wait ...');

        rl.on('line',function(line){
            try{
                var arr         = line.split('\t');
                var object   = {};
                //Parse them here
                //Example
                object['name'] = arr[0]; //Just an example
                var res = collection.insert(object);
            }
            catch (err){
                console.log(err);
            }
        });

        rl.on('close',function(){
            db.close();
            console.log('***************completed');
        });
    }
});

I am a learner too. If someone can make it better, it will be good.

Here is a more performant (inserting batches of objects) and updated version (using async and latest mongo driver) of frank-0's answer

const lineReader = require('line-reader');

async function readFileAndInsertInMongo(file) {
    let total = 0;

    return new Promise((resolve, reject) => {
        let buffer = [];
        lineReader.eachLine(file, (line, last, cb) => {
            // prepare your object based on the line content
            let insertObject = {'some_content': 'some_value'};

            if (total % 10000 === 0 || last) {
                collection.insertMany(buffer, function(err, res){
                    if (last) {
                        if (err) {
                           reject(err);
                        } else {
                           resolve(res);
                        }
                    } else {
                        buffer = [];
                        return cb();
                    }
                });
            } else {
                buffer.push(insertObject);
                return cb();
            }
        });
    });
}

This really is the best solution I have found to parse huge files and insert them in the database without exploding Node's memory. Hope this can help ;)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM