简体   繁体   中英

MongoDB Aggregation Framework group and merge with the nested document in arrays

Use Case: I have the multiple documents with nested documents array(PODDetails) in the collection, need a result group by(WTID field) and the merge of an array(PODDetails) into single documents(except PODDetails other details are same).

Below is document in the collection:

    {
        "_id": "180910eb-4670-4ccb-ac89-0e993b050105",
        "WTID": "WT1389",
        "TDLNumber": "002",
        "POD": "SW 35-44-07-W5",
        "PODDetails": [
            {
                "LoadStartDate": "2019-10-18",
                "LoadStartTime": "17:37:54",
                "LoadStopDate": "2019-10-18",
                "LoadStopTime": "17:37:54",
                "Volume": 15,
                "VolUnit": "m3"
            }
        ],
        "Status": "Active",
        "createdon": {
            "$date": "2020-03-20T17:37:54.000Z"
        }
    },
    {
        "_id": "5a4d3ee0-83e3-40df-a3a3-28f8c7560106",
        "WTID": "WT1389",
        "TDLNumber": "002",
        "POD": "SW 35-44-07-W5",
        "PODDetails": [
            {
                "LoadStartDate": "2019-10-18",
                "LoadStartTime": "17:38:33",
                "LoadStopDate": "2019-10-18",
                "LoadStopTime": "17:38:33",
                "Volume": 25,
                "VolUnit": "m3"
            }
        ],
        "Status": "Active",
        "createdon": {
            "$date": "2020-03-20T18:55:15.000Z"
        }
    },
    {
        "_id": "180910eb-4670-4ccb-ac89-0e993b050107",
        "WTID": "WT1390",
        "TDLNumber": "002",
        "POD": "SW 35-44-07-W5",
        "PODDetails": [
            {
                "LoadStartDate": "2019-10-18",
                "LoadStartTime": "17:37:54",
                "LoadStopDate": "2019-10-18",
                "LoadStopTime": "17:37:54",
                "Volume": 15,
                "VolUnit": "m3"
            }
        ],
        "Status": "Active",
        "createdon": {
            "$date": "2020-03-20T17:37:54.000Z"
        }
    }

The result should be like below, based on "WTID" field

{
    "_id": "180910eb-4670-4ccb-ac89-0e993b050105",
    "WTID": "WT1389",
    "TDLNumber": "002",
    "POD": "SW 35-44-07-W5",
    "PODDetails": [
        {
            "LoadStartDate": "2019-10-18",
            "LoadStartTime": "17:37:54",
            "LoadStopDate": "2019-10-18",
            "LoadStopTime": "17:37:54",
            "Volume": 15,
            "VolUnit": "m3"
        },
        {
            "LoadStartDate": "2019-10-18",
            "LoadStartTime": "17:38:33",
            "LoadStopDate": "2019-10-18",
            "LoadStopTime": "17:38:33",
            "Volume": 25,
            "VolUnit": "m3"
        }
    ],
    "createdon": {
        "$date": "2020-03-20T17:37:54.000Z"
    }
},
{
    "_id": "180910eb-4670-4ccb-ac89-0e993b050107",
    "WTID": "WT1390",
    "TDLNumber": "002",
    "POD": "SW 35-44-07-W5",
    "PODDetails": [
        {
            "LoadStartDate": "2019-10-18",
            "LoadStartTime": "17:37:54",
            "LoadStopDate": "2019-10-18",
            "LoadStopTime": "17:37:54",
            "Volume": 15,
            "VolUnit": "m3"
        }
    ],
    "createdon": {
        "$date": "2020-03-20T17:37:54.000Z"
    }
}

Below is what I tried'... I need to fetch the document only for the current date(ie today)

{
    aggregate([{
        "$addFields": {
            "CreatedOnDate": {
                "$dateToString": {
                    "format": "%Y-%m-%d",
                    "date": {
                        "$add": ["$createdon", 18000000]
                    }
                }
            }
        }
    }, {
        "$match": {
            "CreatedOnDate": {
                "$gte": "2020-03-20",
                "$lte": "2020-03-20"
            },
            "Status": {
                "$eq": "Active"
            }
        }
    }, {
        "$unwind": "$PODDetails"
    }, {
        "$group": {
            "WaterTrackingID": "$WaterTrackingID",
            "POD": {
                "$addToSet": "$PODDetails"
            },
            "data": {
                "$first": "$$ROOT"
            }
        }
    }, {
        "$project": {
            "TDLNumber": "$data.TDLNumber",
            "PointOfDiversion": "$data.PointOfDiversion",
            "POD": 1,
            "CreatedOnDate": "$data.CreatedOnDate"
        }
    }])
}

However, this query isn't working...

What's going wrong here. Any help would be appreciated!!

Many things are going wrong here in your aggregation query!

  1. Why are you adding 5 hours to your date?
  2. In your match stage, $lte + $gte = $eq
  3. In your match stage, Status does not exist
  4. In your $unwind stage, PointOfDiversionVolumeDetails does not exist.
  5. Your $group stage doesn't provide any _id nor accumulator (except for data), WaterTrackingID does not exist. ...

Here's a query which seems to achieve what you want :

db.collection.aggregate([
  {
    $match: {
      $expr: {
        $eq: [
          {
            $dateToString: {
              date: "$createdon",
              format: "%Y-%m-%d"
            }
          },
          "2020-03-20"
        ]
      },

    }
  },
  {
    $group: {
      _id: "$WTID",
      POD: {
        $first: "$POD"
      },
      PODDetails: {
        $push: 
          {
            $arrayElemAt: [
            "$PODDetails",
            0
            ]
          }
      },
      TDLNumber: {
        $first: "$TDLNumber"
      },
      createdon: {
        $first: {
          $dateToString: {
            date: "$createdon",
            format: "%Y-%m-%d"
          }
        },

      },

    }
  }
])

will output

[
  {
    "POD": "SW 35-44-07-W5",
    "PODDetails": [

        {
          "LoadStartDate": "2019-10-18",
          "LoadStartTime": "17:37:54",
          "LoadStopDate": "2019-10-18",
          "LoadStopTime": "17:37:54",
          "VolUnit": "m3",
          "Volume": 15
        }
      ,

        {
          "LoadStartDate": "2019-10-18",
          "LoadStartTime": "17:38:33",
          "LoadStopDate": "2019-10-18",
          "LoadStopTime": "17:38:33",
          "VolUnit": "m3",
          "Volume": 25
        }

    ],
    "TDLNumber": "002",
    "_id": "WT1389",
    "createdon": "2020-03-20"
  },
  {
    "POD": "SW 35-44-07-W5",
    "PODDetails": [

        {
          "LoadStartDate": "2019-10-18",
          "LoadStartTime": "17:37:54",
          "LoadStopDate": "2019-10-18",
          "LoadStopTime": "17:37:54",
          "VolUnit": "m3",
          "Volume": 15
        }

    ],
    "TDLNumber": "002",
    "_id": "WT1390",
    "createdon": "2020-03-20"
  }
]

Here's my workaround based on @matthPen's answer

db.collection.aggregate(
[
    {$match:{Status:"Active"}},
    {$match: {
      $expr: {
        $eq: [
          {
            $dateToString: {
              date: "$createdon",
              format: "%Y-%m-%d"
            }
          },
          "2020-03-20"
        ]
      },
    }},
    {$group : {_id : "$WTID", PODDetails : {$push : "$PODDetails"},
        TDLNumber: {$first: "$TDLNumber"}, POD: {$first: "$POD"}        
    }},
    {$unwind : "$PODDetails"}, 
    {$unwind : "$PODDetails"}, 
    {$group : { _id : "$_id", PODDetails : {$addToSet : "$PODDetails"},
        TDLNumber: {$first: "$TDLNumber"}, POD: {$first: "$POD"}
    }},
    {$project:{WaterTrackingID:"$_id", TDLNumber:"$TDLNumber", POD:"$POD", PODDetails:"$PODDetails"}}

])

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM