We're attempting to migrate from a MongoDB to CosmosDB using the Mongo API. In performance testing, we've noticed that a simple count operation on a Collection results in extremely high RU usage. After removing the $group operation, The RU usage drops from 8,586.56 to 11.39 . Both executions also used the same indexes in the $match
phase of the operation. Would like to know why there's such a huge difference. Is this a performance problem with CosmosDB?
In the code, we're simply executing a collection.countDocuments(whereQuery)
which the MongoAPI is converting to:
{ 'aggregate' : 'NewOffers', 'pipeline' : [{ '$match' : { 'statusCode' : { '$in' : [0.0] } } }, { '$group' : { '_id' : 1.0, 'n' : { '$sum' : 1.0 } } }]
QueryStats with $group:
{
"command" : "db.runCommand({explain: { 'aggregate' : 'NewOffers', 'pipeline' : [{ '$match' : { 'statusCode' : { '$in' : [0.0] } } }, { '$group' : { '_id' : 1.0, 'n' : { '$sum' : 1.0 } } }], 'explain' : true, 'cursor' : { }, 'lsid' : { 'id' : CSUUID('9714164a-f211-4a8f-a885-4457f9edf257') }, '$readPreference' : { 'mode' : 'secondaryPreferred' }, '$db' : 'myDb' }})",
"stages" : [
{
"stage" : "$aggregateQuery",
"timeInclusiveMS" : 361.1143,
"timeExclusiveMS" : 0.0613,
"in" : 1,
"out" : 1,
"dependency" : {
"getNextPageCount" : 1,
"count" : 2,
"time" : 361.053,
"bytes" : NumberLong(101)
},
"details" : {
"database" : "myDb",
"collection" : "NewOffers",
"query" : {
"statusCode" : {
"$in" : [
0.0
]
}
},
"indexUsage" : {
"pathsIndexed" : {
"individualIndexes" : [
"statusCode"
],
"compoundIndexes" : []
},
"pathsNotIndexed" : {
"individualIndexes" : [],
"compoundIndexes" : []
}
},
"aggregate" : {
"_id" : 1.0,
"n" : {
"$sum" : 1.0
}
},
"shardInformation" : [
{
"activityId" : "eadabacc-383a-4cd7-b5ce-6abe4f5ce577",
"shardKeyRangeId" : "[,0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF) move next",
"durationMS" : 333.0191,
"preemptions" : 0,
"outputDocumentCount" : 1,
"retrievedDocumentCount" : 35590
},
{
"activityId" : "703bef9a-62a4-4fc7-bcbc-85e7a35b3741",
"shardKeyRangeId" : "[0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE) move next",
"durationMS" : 333.8226,
"preemptions" : 0,
"outputDocumentCount" : 1,
"retrievedDocumentCount" : 35660
},
{
"activityId" : "a182b7ba-0565-4461-8d45-ac6f45a84d4f",
"shardKeyRangeId" : "[1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE,2FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFD) move next",
"durationMS" : 343.3216,
"preemptions" : 0,
"outputDocumentCount" : 1,
"retrievedDocumentCount" : 35741
},
{
"activityId" : "4ddd60d5-d3c9-44d4-bbb8-397b8c51bcfb",
"shardKeyRangeId" : "[2FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFD,FF) move next",
"durationMS" : 347.9444,
"preemptions" : 0,
"outputDocumentCount" : 1,
"retrievedDocumentCount" : 35215
}
],
"queryMetrics" : {
"retrievedDocumentCount" : 142206,
"retrievedDocumentSizeBytes" : 380079234,
"outputDocumentCount" : 4,
"outputDocumentSizeBytes" : 496,
"indexHitRatio" : 0.0,
"totalQueryExecutionTimeMS" : 1317.74,
"queryPreparationTimes" : {
"queryCompilationTimeMS" : 0.86,
"logicalPlanBuildTimeMS" : 0.21,
"physicalPlanBuildTimeMS" : 0.47,
"queryOptimizationTimeMS" : 0
},
"indexLookupTimeMS" : 0.59,
"documentLoadTimeMS" : 802.01,
"vmExecutionTimeMS" : 1315.04,
"runtimeExecutionTimes" : {
"queryEngineExecutionTimeMS" : 512.4,
"systemFunctionExecutionTimeMS" : 101.12,
"userDefinedFunctionExecutionTimeMS" : 0
},
"documentWriteTimeMS" : 0.04
}
}
}
],
"estimatedDelayFromRateLimitingInMilliseconds" : 0.0,
"retriedDueToRateLimiting" : false,
"totalRequestCharge" : 8586.56,
"continuation" : {
"hasMore" : false
},
"ok" : 1.0
}
Executions stats after removing the $group
stage
{
"command" : "db.runCommand({explain: { 'aggregate' : 'NewOffers', 'pipeline' : [{ '$match' : { 'statusCode' : { '$in' : [0.0] } } }], 'explain' : true, 'cursor' : { }, 'lsid' : { 'id' : CSUUID('9714164a-f211-4a8f-a885-4457f9edf257') }, '$readPreference' : { 'mode' : 'secondaryPreferred' }, '$db' : 'myDb' }})",
"stages" : [
{
"stage" : "$query",
"timeInclusiveMS" : 76.5114,
"timeExclusiveMS" : 76.5114,
"in" : 101,
"out" : 101,
"dependency" : {
"getNextPageCount" : 1,
"count" : 1,
"time" : 0.0,
"bytes" : NumberLong(185745)
},
"details" : {
"database" : "myDb",
"collection" : "NewOffers",
"query" : {
"statusCode" : {
"$in" : [
0.0
]
}
},
"indexUsage" : {
"pathsIndexed" : {
"individualIndexes" : [
"statusCode"
],
"compoundIndexes" : []
},
"pathsNotIndexed" : {
"individualIndexes" : [],
"compoundIndexes" : []
}
},
"shardInformation" : [
{
"activityId" : "930a2e5b-68aa-49f7-b04b-6e7607d917db",
"shardKeyRangeId" : "[,0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF) move next",
"durationMS" : 29.7279,
"preemptions" : 0,
"outputDocumentCount" : 101,
"retrievedDocumentCount" : 101
},
{
"activityId" : "5697fd05-f12f-4bb4-8c0d-377debe1f890",
"shardKeyRangeId" : "[0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF,1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE) move next",
"durationMS" : 7.4769,
"preemptions" : 0,
"outputDocumentCount" : 101,
"retrievedDocumentCount" : 101
},
{
"activityId" : "6a2733b2-1b25-4098-a50e-f6e53ac09398",
"shardKeyRangeId" : "[1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE,2FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFD) move next",
"durationMS" : 28.7704,
"preemptions" : 0,
"outputDocumentCount" : 101,
"retrievedDocumentCount" : 101
},
{
"activityId" : "34913339-b43e-473e-a5bd-fdfc84eb88ff",
"shardKeyRangeId" : "[2FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFD,FF) move next",
"durationMS" : 34.9221,
"preemptions" : 0,
"outputDocumentCount" : 101,
"retrievedDocumentCount" : 101
}
],
"queryMetrics" : {
"retrievedDocumentCount" : 404,
"retrievedDocumentSizeBytes" : 1051338,
"outputDocumentCount" : 404,
"outputDocumentSizeBytes" : 706377,
"indexHitRatio" : 1.0,
"totalQueryExecutionTimeMS" : 18.86,
"queryPreparationTimes" : {
"queryCompilationTimeMS" : 0.24,
"logicalPlanBuildTimeMS" : 0.12,
"physicalPlanBuildTimeMS" : 0.43,
"queryOptimizationTimeMS" : 0
},
"indexLookupTimeMS" : 0.34,
"documentLoadTimeMS" : 2.54,
"vmExecutionTimeMS" : 16.4799,
"runtimeExecutionTimes" : {
"queryEngineExecutionTimeMS" : 0.7,
"systemFunctionExecutionTimeMS" : 0.36,
"userDefinedFunctionExecutionTimeMS" : 0
},
"documentWriteTimeMS" : 12.8999
}
}
}
],
"estimatedDelayFromRateLimitingInMilliseconds" : 0.0,
"retriedDueToRateLimiting" : false,
"totalRequestCharge" : 11.39,
"continuation" : {
"hasMore" : true,
"$comment" : "Command did not fully drain, to fully drain add explain option {\"executionStatistics\": true }."
},
"ok" : 1.0
}
Sorry to hear about the issues you're having. The collection.countDocuments() driver method inefficiencies is a known issue that's being fixed - as you saw with the very high retrievedDocumentCount in explain. As a workaround, the collection.count() method will be far more efficient.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.