简体   繁体   中英

How to Group and get Count using morphia in mongoDB?

I'm using Morphia with MongoDB in Java, and i have collection like this

{"_id":"5d5e7ce7869eef030869e85c",
    "ip":"66.249.79.181",
    "date":"2019-08-19T18:30:00.000Z",
    "request_url":"https://www.example.com/home", 
    "status_code":"200", 
    "bot":"Google Android",
    "type":"type/html",
    "domain":"https://www.example.com"},

{"_id":"5d5e7ce7869eef030869e85c",
    "ip":"66.249.79.181",
    "date":"2019-08-19T18:30:00.000Z",
    "request_url":"https://www.example.com/home", 
    "status_code":"200", 
    "bot":"Google",
    "type":"type/html",
    "domain":"https://www.example.com"},

{"_id":"5d5e7ce7869eef030869e85c",
    "ip":"66.249.79.181",
    "date":"2019-08-19T18:30:00.000Z",
    "request_url":"https://www.example.com/home", 
    "status_code":"200", 
    "bot":"bing",
    "type":"type/html",
    "domain":"https://www.example.com"}

i need to using grouping("request_url") and get count of "bot" field what if I need result like this

{"request_url":"https://www.example.com/home",
 "status_code":"200",
 "Google": 1,
 "Google Android": 1,
 "bing": 1,
 "type":"type/html", }

How can i do this group by "request_url" Field and get Count of each "bot" Field have

Using aggregate below as:

db.collection.aggregate([
 {
    $group: {
      _id: {
        request_url: "$request_url",
        bot: "$bot"
      },
      type: {
        $max: "$type"
      },
      status_code: {
        $max: "$status_code"
      },
      count: {
        $sum: 1
      }
    }
  },
  {
    $group: {
      _id: "$_id.request_url",
      type: {
        $max: "$type"
      },
      status_code: {
        $max: "$status_code"
      },
      counts: {
        $push: {
          bot: "$_id.bot",
          count: "$count"
        }
      }
    }
  }
])

Giving input:

[
  {
    "ip": "66.249.79.181",
    "date": "2019-08-19T18:30:00.000Z",
    "request_url": "https://www.example.com/home",
    "status_code": "200",
    "bot": "Google Android",
    "type": "type/html",
    "domain": "https://www.example.com"
  },
  {
    "ip": "66.249.79.181",
    "date": "2019-08-19T18:30:00.000Z",
    "request_url": "https://www.example.com/home",
    "status_code": "200",
    "bot": "Google",
    "type": "type/html",
    "domain": "https://www.example.com"
  },
  {
    "ip": "66.249.79.181",
    "date": "2019-08-19T18:30:00.000Z",
    "request_url": "https://www.example.com/home",
    "status_code": "200",
    "bot": "bing",
    "type": "type/html",
    "domain": "https://www.example.com"
  }
]

And output being:

[
  {
    "_id": "https://www.example.com/home",
    "counts": [
      {
        "bot": "bing",
        "count": 1
      },
      {
        "bot": "Google",
        "count": 1
      },
      {
        "bot": "Google Android",
        "count": 1
      }
    ],
    "status_code": "200",
    "type": "type/html"
  }
]

The following query can get us the expected output:

db.collection.aggregate([
    {
        $group:{
            "_id":{
                "request_url":"$request_url",
                "bot":"$bot"
            },
            "request_url":{
                $first:"$request_url"
            },
            "k":{
                $first:"$bot"
            },
            "v":{
                $sum:1
            }
        }
    },
    {
        $group:{
            "_id":"$request_url",
            "request_url":{
                $first:"$request_url"
            },
            "bots":{
                $push:{
                    "k":"$k",
                    "v":"$v"
                }
            }
        }
    },
    {
        $project:{
            "info.request_url":"$request_url",
            "bots":{
                $arrayToObject:"$bots"
            }
        }
    },
    {
        $project:{
            "info":{
                $mergeObjects:["$info","$bots"]
            }
        }
    },
    {
        $replaceRoot:{
            newRoot:"$info"
        }
    }
]).pretty()

Data set:

{
    "_id" : ObjectId("5d6d0f456bc2ad3b23f7dfcf"),
    "ip" : "66.249.79.181",
    "date" : "2019-08-19T18:30:00.000Z",
    "request_url" : "https://www.example.com/home",
    "status_code" : "200",
    "bot" : "Google Android",
    "type" : "type/html",
    "domain" : "https://www.example.com"
}
{
    "_id" : ObjectId("5d6d0f456bc2ad3b23f7dfd0"),
    "ip" : "66.249.79.181",
    "date" : "2019-08-19T18:30:00.000Z",
    "request_url" : "https://www.example.com/home",
    "status_code" : "200",
    "bot" : "Google",
    "type" : "type/html",
    "domain" : "https://www.example.com"
}
{
    "_id" : ObjectId("5d6d0f456bc2ad3b23f7dfd1"),
    "ip" : "66.249.79.181",
    "date" : "2019-08-19T18:30:00.000Z",
    "request_url" : "https://www.example.com/home",
    "status_code" : "200",
    "bot" : "bing",
    "type" : "type/html",
    "domain" : "https://www.example.com"
}

Output:

{
    "request_url" : "https://www.example.com/home",
    "bing" : 1,
    "Google" : 1,
    "Google Android" : 1
}

Explanation: The data is grouped on the request_url and distinct key-value pairs are calculated. The key(k) would hold the bot name and the value(v) would hold the occurrence count. Later on, each pair is pushed into an array and then the array is converted into an object.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM