[英]MongoDB Aggregation - SUM WHERE
我在mongoDB中有一个类似于下面的大型数据集
我想在MongoDB中运行一个聚合,该聚合等效于以下SQL:
SELECT SUM(cores) from machines
WHERE idc='AMS' AND cluster='1' AND type='Physical';
我该如何在MongoDB中进行操作?
[
{
"_id" : "55d5dc40281077b6d8af1bfa",
"hostname" : "x",
"domain" : "domain",
"description" : "VMWare ESXi 5",
"cluster" : "1",
"type" : "Physical",
"os" : "EXSi",
"idc" : "AMS",
"environment" : "DR",
"deviceclass" : "host",
"cores" : "64",
"memory" : "256",
"mounts" : [ ],
"roles" : [
"ESX-HOST"
],
"ipset" : {
"backnet" : "1"
},
"frontnet" : [ ],
"created" : "2015-09-08T07:35:03.343Z"
},
{
"_id" : "55d5dc40281077b6d8af1bfb",
"hostname" : "x",
"domain" : "domain",
"description" : "VMWare ESXi 5",
"cluster" : "1",
"type" : "Physical",
"os" : "EXSi",
"idc" : "AMS",
"environment" : "DR",
"deviceclass" : "host",
"cores" : "64",
"memory" : "256",
"mounts" : [ ],
"roles" : [
"ESX-HOST"
],
"ipset" : {
"backnet" : "1"
},
"frontnet" : [ ],
"created" : "2015-09-08T07:35:03.346Z"
}
]
首先,您需要更新文档,因为cores
值是字符串而不是数字。 为此,我们使用“批量”操作。
var bulk = db.machines.initializeOrderedBulkOp(),
count = 0;
db.machines.find({ "cores": { "$type": 2 }}).forEach(function(doc){
var cores = parseInt(doc.cores);
bulk.find({ "_id": doc._id }).update({
"$set": { "cores": cores } })
count++;
if (count % 200 == 0){
// execute per 200 operations and re-init
bulk.execute();
bulk = db.machines.initializeOrderedBulkOp();
}
})
// clean up queues
if (count % 200 != 0)
bulk.execute();
然后,使用聚合框架,我们可以得出cores
总和。 首先,我们需要使用$match
运算符过滤文档,在$group
阶段,我们使用$sum
运算符获取cores
值的总和。
db.machines.aggregate([
{ "$match": { "idc": "AMS", "cluster": "1", "type": "Physical" }},
{ "$group": { "_id": null, "sum_cores": { "$sum": "$cores" }}}
])
哪个返回:
{ "_id" : null, "sum_cores" : 128 }
虽然我还没有执行它来测试它,但请检查以下内容:
db.<collection>.aggregation([
{$match: {
idc: 'AMS',
cluster: 1,
type:'Physical'
}
},
{$group: {
_id: null,
sum: {$sum: "$_id"}
}
},
{$project: {
_id:0,
sum:1
}
}
])
我认为使用聚合框架是不可能的,因为'cores'
被保存为string
,并且当前mongo不允许在$project
管道阶段将string投影为number。 使用简单javascript的结果相同:
var tmp = db.cores.find({idc: 'AMS', cluster: '1', type: 'Physical'}, {_id: 0, cores: 1})
var totalCores = 0;
tmp.forEach(function(doc) {
totalCores += parseInt(doc.cores);
})
print(totalCores)
如果我正确理解问题。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.