简体   繁体   中英

MongoDB update nested array foreach

I have collection of Users, and each user has an array Ancestors, previous developer did wrong DB architecture, and now each of ancestors is string but must be an ObjectId . It still contain objectId(in fact HEX of object Id, like 558470744a73274db0f0d65d ). How can I convert each of ancestors to ObjectId? I wrote this:

db.getCollection('Users').find({}).forEach(function(item){
  if (item.Ancestors instanceof Array){
      var tmp = new Array()
      item.Ancestors.forEach(function(ancestor){
          if (ancestor instanceof String){
               tmp.push(ObjectId(ancestor))
             }
          })
          item.Ancestors = tmp
          db.getCollection('Users').save(item) 
  }
})

But looks like it works not properly, and some of ancestors now is ObjectId, and some null . And also ancestors can be null from start. so I put all that if 's

Try like this using mongoose,

var mongoose = require('mongoose');

db.getCollection('Users').find({}).forEach(function(item){
  if (item.Ancestors instanceof Array){
      var tmp = new Array()
      item.Ancestors.forEach(function(ancestor){
          if (ancestor instanceof String){
               tmp.push(mongoose.Types.ObjectId(ancestor))
             }
          })
          item.Ancestors = tmp
          db.getCollection('Users').save(item) 
  }
})

The solution concept here is to loop through your collection with a cursor and for each document within the cursor, gather data about the index position of the Ancestors array elements.

You will then use this data later on in the loop as the update operation parameters to correctly identify the elements to update.

Supposing your collection is not that humongous, the intuition above can be implemented using the forEach() method of the cursor as you have done in your attempts to do the iteration and getting the index data for all the array elements involved.

The following demonstrates this approach for small datasets:

function isValidHexStr(id) {
    var checkForHexRegExp = new RegExp("^[0-9a-fA-F]{24}$");
    if(id == null) return false;
    if(typeof id == "string") {
        return id.length == 12 || (id.length == 24 && checkForHexRegExp.test(id));
    }
    return false;
};


db.users.find({"Ancestors.0": { "$exists": true, "$type": 2 }}).forEach(function(doc){ 
    var ancestors = doc.Ancestors,
        updateOperatorDocument = {}; 
    for (var idx = 0; idx < ancestors.length; idx++){ 
        if(isValidHexStr(ancestors[idx]))                   
            updateOperatorDocument["Ancestors."+ idx] = ObjectId(ancestors[idx]);           
    };  
    db.users.updateOne(
        { "_id": doc._id },
        { "$set": updateOperatorDocument }
    );      
});

Now for improved performance especially when dealing with large collections, take advantage of using a Bulk() API for updating the collection in bulk. This is quite effecient as opposed to the above operations because with the bulp API you will be sending the operations to the server in batches (for example, say a batch size of 1000) which gives you much better performance since you won't be sending every request to the server but just once in every 1000 requests, thus making your updates more efficient and quicker.

The following examples demonstrate using the Bulk() API available in MongoDB versions >= 2.6 and < 3.2 .

function isValidHexStr(id) {
    var checkForHexRegExp = new RegExp("^[0-9a-fA-F]{24}$");
    if(id == null) return false;
    if(typeof id == "string") {
        return id.length == 12 || (id.length == 24 && checkForHexRegExp.test(id));
    }
    return false;
};

var bulkUpdateOps = db.users.initializeUnOrderedBulkOp(), 
    counter = 0;

db.users.find({"Ancestors.0": { "$exists": true, "$type": 2 }}).forEach(function(doc){ 
    var ancestors = doc.Ancestors,
        updateOperatorDocument = {}; 
    for (var idx = 0; idx < ancestors.length; idx++){ 
        if(isValidHexStr(ancestors[idx]))                   
            updateOperatorDocument["Ancestors."+ idx] = ObjectId(ancestors[idx]);           
    };
    bulkUpdateOps.find({ "_id": doc._id }).update({ "$set": updateOperatorDocument })

    counter++;  // increment counter for batch limit
    if (counter % 1000 == 0) { 
        // execute the bulk update operation in batches of 1000
        bulkUpdateOps.execute(); 
        // Re-initialize the bulk update operations object
        bulkUpdateOps = db.users.initializeUnOrderedBulkOp();
    } 
})

// Clean up remaining operation in the queue
if (counter % 1000 != 0) { bulkUpdateOps.execute(); }

The next example applies to the new MongoDB version 3.2 which has since deprecated the Bulk() API and provided a newer set of apis using bulkWrite() .

It uses the same cursors as above but creates the arrays with the bulk operations using the same forEach() cursor method to push each bulk write document to the array. Because write commands can accept no more than 1000 operations, you will need to group your operations to have at most 1000 operations and re-intialise the array when the loop hits the 1000 iteration:

var cursor = db.users.find({"Ancestors.0": { "$exists": true, "$type": 2 }}),
    bulkUpdateOps = [];

cursor.forEach(function(doc){ 
    var ancestors = doc.Ancestors,
        updateOperatorDocument = {}; 
    for (var idx = 0; idx < ancestors.length; idx++){ 
        if(isValidHexStr(ancestors[idx]))                   
            updateOperatorDocument["Ancestors."+ idx] = ObjectId(ancestors[idx]);           
    };
    bulkUpdateOps.push({ 
        "updateOne": {
            "filter": { "_id": doc._id },
            "update": { "$set": updateOperatorDocument }
         }
    });

    if (bulkUpdateOps.length == 1000) {
        db.users.bulkWrite(bulkUpdateOps);
        bulkUpdateOps = [];
    }
});         

if (bulkUpdateOps.length > 0) { db.users.bulkWrite(bulkUpdateOps); }

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM