简体   繁体   中英

Match multiple values in MongoDB with one query in pymongo

I have a collection test which have following values from which i have to get document based on "value" field which i can easily get from below given query.

db.getCollection('test').find({"value" : 100})

but the real problem is that i have list of "value" fields such as [100,104,200152,.......] this list can be really long and i want my result in below given format in order to reduce number of mongo query as this is taking too much time, if list containing "values" is too large then i have to preform multiple mongo queries to fetch all the records.

{100:[
    /* 1 */

    {
        "_id" : "C1",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 2 */
    {
        "_id" : "C2",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 3 */
    {
        "_id" : "C3",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 4 */
    {
        "_id" : "C4",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 5 */
    {
        "_id" : "CO",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 6 */
    {
        "_id" : "DD",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }

    /* 7 */
    {
        "_id" : "EX",
        "value" : 100,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }],

104:

    [{
        "_id" : "AU",
        "value" : 104,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }],
200152:

    [
    {
        "_id" : "GenFile",
        "value" : 200152,
        "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
        "timetaken" : 3.0
    }
    ]

DB

/* 1 */
{
    "_id" : "AU",
    "value" : 104,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 2 */
{
    "_id" : "C1",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 3 */
{
    "_id" : "C2",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 4 */
{
    "_id" : "C3",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 5 */
{
    "_id" : "C4",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 6 */
{
    "_id" : "CO",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 7 */
{
    "_id" : "DD",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 8 */
{
    "_id" : "EX",
    "value" : 100,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 9 */
{
    "_id" : "GS_SEG",
    "value" : 124755350,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

/* 10 */
{
    "_id" : "GenFile",
    "value" : 200152,
    "lastUpdatedTime" : ISODate("2019-11-04T00:00:00.000Z"),
    "timetaken" : 3.0
}

You are searching in the right direction, you will want to prevent your code to spend most of it's time on network requests. The pymongo $in operator selects the documents where the value of a field equals any value in the specified array.

In your case it would look like this:

# Set or build a list of the values
list_with_values =  [100, 104, 200152]

# Make one call to the DB, asking for all of the matching records.
result = db.getCollection('test').find({"value" : {"$in": list_with_values})

Further reference on how the $in operator works: http://docs.mongodb.org/manual/reference/operator/query/in/

You can use the following aggregation to do the work for you. But, it will have value fields 100 , 104 as strings instead of numbers (I had to use toString operator, otherwise I was getting error).

db.collection.aggregate([
  {
    $group: {
      _id: "$value",
      root: {
        $push: "$$ROOT"
      }
    }
  },
  {
    $project: {
      k: {
        $toString: "$_id"
      },
      v: "$root",
      _id: 0
    }
  },
  {
    $group: {
      _id: null,
      x: {
        $push: "$$ROOT"
      }
    }
  },
  {
    $project: {
      _id: 0,
      x: {
        $arrayToObject: "$x"
      }
    }
  },
  {
    $replaceRoot: {
      newRoot: "$x"
    }
  }
])

Following will be the output:

[
  {
    "1.24755e+08": [
      {
        "_id": "GS_SEG",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 1.2475535e+08
      }
    ],
    "100": [
      {
        "_id": "C1",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "C2",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "C3",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "C4",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "CO",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "DD",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      },
      {
        "_id": "EX",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 100
      }
    ],
    "104": [
      {
        "_id": "AU",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 104
      }
    ],
    "200152": [
      {
        "_id": "GenFile",
        "lastUpdatedTime": ISODate("2019-11-04T00:00:00Z"),
        "timetaken": 3,
        "value": 200152
      }
    ]
  }
]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM