繁体   English   中英

优化 MongoDB 聚合管道(Group、Lookup、Match)

[英]Optimizing MongoDB Aggregation Pipeline (Group, Lookup, Match)

我是 NoSQL 数据库的新手,我选择 MongoDB 作为我的第一个 NoSQL 数据库。 我制作了一个聚合管道来显示我想要的数据,这是我的文档示例:

来自用户集合的文档样本

{
    "_id": 9,
    "name": "Sample Name",
    "email": "email@example.com",
    "password": "password hash"
}

来自 Pages Collection 的文档示例(这个并不重要)

{
    "_id": 42,
    "name": "Product Name",
    "description": "Product Description",
    "user_id": 8,
    "rating_categories": [{
        "_id": 114,
        "name": "Build Quality"
    }, {
        "_id": 115,
        "name": "Price"
    }, {
        "_id": 116,
        "name": "Feature"
    }, {
        "_id": 117,
        "name": "Comfort"
    }, {
        "_id": 118,
        "name": "Switch"
    }]
}

来自 Reviews Collection 的文档样本

{
    "_id": 10,
    "page_id": 42, #ID reference from pages collection
    "user_id": 8, #ID reference from users collection
    "review": "The review of the product",
    "ratings": [{
        "_id": 114, #ID Reference from pages collection of what rating category it is
        "rating": 5
    }, {
        "_id": 115,
        "rating":4
    }, {
        "_id": 116,
        "rating": 5
    }, {
        "_id": 117,
        "rating": 3
    }, {
        "_id": 118,
        "rating": 4
    }],
    "created": "1582825968963", #Date Object
    "votes": {
        "downvotes": [],
        "upvotes": [9] #IDs of users who upvote this review
    }
}

我想通过 page_id 获得评论,可以从我制作的 API 访问它,这是聚合的预期结果:

[
  {
    "_id": 10, #Review of the ID
    "created": "Thu, 27 Feb 2020 17:52:48 GMT",
    "downvote_count": 0, #Length of votes.downvotes from reviews collection
    "page_id": 42, #Page ID
    "ratings": [ #Stores what rate at what rating category id
      {
        "_id": 114,
        "rating": 5
      },
      {
        "_id": 115,
        "rating": 4
      },
      {
        "_id": 116,
        "rating": 5
      },
      {
        "_id": 117,
        "rating": 3
      },
      {
        "_id": 118,
        "rating": 4
      }
    ],
    "review": "The Review",
    "upvote_count": 0, #Length of votes.upvotes from reviews collection
    "user": { #User who reviewed
      "_id": 8, #User ID
      "downvote_count": 0, #How many downvotes this user receive from all of the user's reviews
      "name": "Sample Name", #Username
      "review_count": 1, #How many reviews the user made
      "upvote_count": 1 #How many upvotes this user receive from all of the user's reviews
    },
    "vote_state": 0 #Determining vote state from the user (who requested to the API) for this review, 0 for no vote, -1 for downvote, 1 for upvote
  },
  ...
]

这是我为上述结果制作的评论集合聚合管道:

user_id = 9
page_id = 42
pipeline = [
            {"$group": {
                    "_id": {"user_id":"$user_id", "page_id": "$page_id"},
                    "review_id": {"$last": "$_id"},
                    "page_id": {"$last": "$page_id"},
                    "user_id" : {"$last": "$user_id"},
                    "ratings": {"$last": "$ratings"},
                    "review": {"$last": "$review"},
                    "created": {"$last": "$created"},
                    "votes": {"$last": "$votes"},
                    "upvote_count": {"$sum": 
                        {"$cond": [ 
                            {"$ifNull": ["$votes.upvotes", False]}, 
                            {"$size": "$votes.upvotes"}, 
                            0
                        ]}
                    },
                    "downvote_count": {"$sum": 
                        {"$cond": [ 
                            {"$ifNull": ["$votes.downvotes", False]}, 
                            {"$size": "$votes.downvotes"}, 
                            0
                        ]}
                    }}},
            {"$lookup": {
                "from": "users",
                "localField": "user_id",
                "foreignField": "_id",
                "as": "user"
            }},
            {"$unwind": "$user"},
            {"$lookup": {
                "from": "reviews",
                "localField": "user._id",
                "foreignField": "user_id",
                "as": "user.reviews"
            }},
            {"$addFields":{
                "_id": "$review_id",
                "user.review_count": {"$size": "$user.reviews"},
                "user.upvote_count": {"$sum":{
                    "$map":{
                        "input":"$user.reviews",
                        "in":{"$cond": [ 
                            {"$ifNull": ["$$this.votes.upvotes", False]}, 
                            {"$size": "$$this.votes.upvotes"}, 
                            0
                        ]}
                    }
                }},
                "user.downvote_count": {"$sum":{
                    "$map":{
                        "input":"$user.reviews",
                        "in":{"$cond": [ 
                            {"$ifNull": ["$$this.votes.downvotes", False]}, 
                            {"$size": "$$this.votes.downvotes"}, 
                            0
                        ]}
                    }
                }},
                "vote_state": {"$switch": {
                    "branches": [
                        {"case": { "$and" : [
                            {"$ifNull": ["$votes.upvotes", False]}, 
                            {"$in": [user_id, "$votes.upvotes"]}
                        ]}, "then": 1
                        },
                        {"case": { "$and" : [
                            {"$ifNull": ["$votes.downvotes", False]}, 
                            {"$in": [user_id, "$votes.downvotes"]}
                        ]}, "then": -1
                        },
                    ],
                    "default": 0
                }},
            }},
            {"$project":{
                "user.password": 0,
                "user.email": 0,
                "user_id": 0,
                "review_id" : 0,
                "votes": 0,
                "user.reviews": 0 
            }},
            {"$sort": {"created": -1}},
            {"$match": {"page_id": page_id}},
        ]

注意:用户可以对同一个page_id进行多次评论,但只会显示最新的

顺便说一句,我正在使用 pymongo,这就是运营商有引号的原因

我的问题是:

  1. 是否有优化我的聚合管道的空间?

  2. 有多个小聚合执行来获取上述数据是否被认为是一种好习惯,还是有 1 个大聚合(或尽可能少)来获取我想要的数据总是更好?

  3. 如您所见,每次我想从review集合的文档中访问votes.upvotesvotes.downvotes ,我都会检查该字段是否为空,那是因为该字段votes.upvotesvotes.downvotes不是在用户进行评论时进行,而不是在用户对该评论进行投票时进行。 当用户进行评论并删除$ifNull时,我应该在votes.upvotesvotes.downvotes上创建一个空字段吗? 这会提高聚合的性能吗?

谢谢

检查此聚合是否具有更好的性能。

如果您还没有,请创建这些索引:

db.reviews.create_index([("page_id", 1)])

注意:我们可以进一步提高性能,避免再次$lookup评论


db.reviews.aggregate([
  {
    $match: {
      page_id: page_id
    }
  },
  {
    $addFields: {
      request_user_id: user_id
    }
  },
  {
    $group: {
      _id: {
        page_id: "$page_id",
        user_id: "$user_id",            
        request_user_id: "$request_user_id"
      },
      data: {
        $push: "$$ROOT"
      }
    }
  },
  {
    $lookup: {
      "from": "users",
      "let": {
        root_user_id: "$_id.user_id"
      },
      "pipeline": [
        {
          $match: {
            $expr: {
              $eq: [
                "$$root_user_id",
                "$_id"
              ]
            }
          }
        },
        {
          $lookup: {
            "from": "reviews",
            "let": {
              root_user_id: "$$root_user_id"
            },
            "pipeline": [
              {
                $match: {
                  $expr: {
                    $eq: [
                      "$$root_user_id",
                      "$user_id"
                    ]
                  }
                }
              },
              {
                $project: {
                  user_id: 1,
                  downvote_count: {
                    $size: "$votes.downvotes"
                  },
                  upvote_count: {
                    $size: "$votes.upvotes"
                  }
                }
              },
              {
                $group: {
                  _id: null,
                  review_count: {
                    $sum: {
                      $cond: [
                        {
                          $eq: [
                            "$$root_user_id",
                            "$user_id"
                          ]
                        },
                        1,
                        0
                      ]
                    }
                  },
                  upvote_count: {
                    $sum: "$upvote_count"
                  },
                  downvote_count: {
                    $sum: "$downvote_count"
                  }
                }
              },
              {
                $unset: "_id"
              }
            ],
            "as": "stats"
          }
        },
        {
          $project: {
            tmp: {
              $mergeObjects: [
                {
                  _id: "$_id",
                  name: "$name"
                },
                {
                  $arrayElemAt: [
                    "$stats",
                    0
                  ]
                }
              ]
            }
          }
        },
        {
          $replaceWith: "$tmp"
        }
      ],
      "as": "user"
    }
  },
  {
    $addFields: {
      first: {
        $mergeObjects: [
          "$$ROOT",
          {
            $arrayElemAt: [
              "$data",
              0
            ]
          },
          {
            user: {
              $arrayElemAt: [
                "$user",
                0
              ]
            },
            created: {
              $toDate: {
                $toLong: {
                  $arrayElemAt: [
                    "$data.created",
                    0
                  ]
                }
              }
            },
            downvote_count: {
              $reduce: {
                input: "$data.votes.downvotes",
                initialValue: 0,
                in: {
                  $add: [
                    "$$value",
                    {
                      $size: "$$this"
                    }
                  ]
                }
              }
            },
            upvote_count: {
              $reduce: {
                input: "$data.votes.upvotes",
                initialValue: 0,
                in: {
                  $add: [
                    "$$value",
                    {
                      $size: "$$this"
                    }
                  ]
                }
              }
            },
            vote_state: {
              $cond: [
                {
                  $gt: [
                    {
                      $size: {
                        $filter: {
                          input: "$data.votes.upvotes",
                          cond: {
                            $in: [
                              "$_id.request_user_id",
                              "$$this"
                            ]
                          }
                        }
                      }
                    },
                    0
                  ]
                },
                1,
                {
                  $cond: [
                    {
                      $gt: [
                        {
                          $size: {
                            $filter: {
                              input: "$data.votes.downvotes",
                              cond: {
                                $in: [
                                  "$_id.request_user_id",
                                  "$$this"
                                ]
                              }
                            }
                          }
                        },
                        0
                      ]
                    },
                    -1,
                    0
                  ]
                }
              ]
            }
          }
        ]
      }
    }
  },
  {
    $unset: [
      "first.data",
      "first.votes",
      "first.user_id",
      "first.request_user_id"
    ]
  },
  {
    $replaceWith: "$first"
  },
  {
    "$sort": {
      "created": -1
    }
  }
])

蒙戈游乐场

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM