[英]Aggregations in Elasticsearch
I have an elasticsearch query that returns a bunch of objects that looks like this: 我有一个elasticsearch查询,它返回一堆看起来像这样的对象:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "searchdb",
"_type": "profile",
"_id": "1825",
"_score": 1,
"_source": {
"id": 1825,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"hourly_values": [
{
"datetime": "1997-07-16T19:00:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
}
]
}
},
{
"_index": "searchdb",
"_type": "profile",
"_id": "1808",
"_score": 1,
"_source": {
"id": 1808,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"hourly_values": [
{
"datetime": "1997-07-16T19:00:00.00+00:00",
"seconds": 900
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 800
}
]
}
}
]
}
I want to return the same result, but with an aggregation of the seconds fields for each object returned. 我想返回相同的结果,但返回的每个对象的秒字段汇总在一起。
My query right now looks like this: 我的查询现在看起来像这样:
{
"query": {
"filtered":{
"filter":{
"geo_distance":{
"distance":"1km",
"geo_location":{
"lat":"41.1234",
"lon":"-87.5678"
}
}
}
}
},
"aggregations": {
"seconds_sum": {
"sum": {
"field": "hourly_values.seconds"
}
}
}
}
The above just aggregates all the seconds for all objects together. 上面只是将所有对象的所有秒汇总在一起。 I can't figure out how to aggregate just the seconds for each object, and return that aggregate with the object, so I can end up with something like this: 我无法弄清楚如何仅聚合每个对象的秒数,并随该对象返回该聚合,因此我可以得到如下结果:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "searchdb",
"_type": "profile",
"_id": "1825",
"_score": 1,
"_source": {
"id": 1825,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"seconds":3600
}
},
{
"_index": "searchdb",
"_type": "profile",
"_id": "1808",
"_score": 1,
"_source": {
"id": 1808,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"seconds":2900
}
}
]
}
Or something like that ... 或类似的东西 ...
That's quite easy. 那很容易。 First of all, you will need to store your hourly_values
as nested objects . 首先,您需要将hourly_values
存储为嵌套对象 。
You have to aggregate by unique value using terms , in this case it's probably going to be id, only then you have to sum . 您必须使用条款按唯一值进行汇总,在这种情况下,它可能会是id,只有这样您才可以求和 。 To sum things up: 总结一下:
PUT /test
{
"mappings": {
"data": {
"properties": {
"id": {
"type": "integer"
},
"geo_location": {
"type": "geo_point"
},
"hourly_values": {
"type": "nested",
"properties": {
"datetime": {
"type": "date"
},
"seconds": {
"type": "integer"
}
}
}
}
}
}
}
PUT /test/data/1
{
"id": 1825,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"hourly_values": [
{
"datetime": "1997-07-16T19:00:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
}
]
}
PUT /test/data/2
{
"id": 1808,
"market": "Chicago",
"geo_location": {
"lat": 41.1234,
"lon": -87.5678
},
"hourly_values": [
{
"datetime": "1997-07-16T19:00:00.00+00:00",
"seconds": 900
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 1200
},
{
"datetime": "1997-07-16T19:20:00.00+00:00",
"seconds": 800
}
]
}
POST /test/_search
{
"size": 0,
"aggs": {
"Ids": {
"terms": {
"field": "id",
"size": 0
},
"aggs": {
"Nesting": {
"nested": {
"path": "hourly_values"
},
"aggs": {
"SumSeconds": {
"sum": {
"field": "hourly_values.seconds"
}
}
}
}
}
}
}
}
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"Ids": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1808,
"doc_count": 1,
"Nesting": {
"doc_count": 3,
"SumSeconds": {
"value": 2900
}
}
},
{
"key": 1825,
"doc_count": 1,
"Nesting": {
"doc_count": 3,
"SumSeconds": {
"value": 3600
}
}
}
]
}
}
}
If you'd like to return documents next to them too, you could use Top Hit s aggregation together with Nested Sum: 如果您也想将文档返回到它们旁边,则可以将Top Hit的聚合与嵌套总和一起使用:
POST /test/_search
{
"size": 0,
"aggs": {
"Ids": {
"terms": {
"field": "id",
"size": 0
},
"aggs": {
"Objects": {
"top_hits": {
"_source": ["id", "market", "geo_location"],
"size": 1
}
},
"Nesting": {
"nested": {
"path": "hourly_values"
},
"aggs": {
"SumSeconds": {
"sum": {
"field": "hourly_values.seconds"
}
}
}
}
}
}
}
}
And this would bring back it: 这会带回它:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"Ids": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1808,
"doc_count": 1,
"Nesting": {
"doc_count": 3,
"SumSeconds": {
"value": 2900
}
},
"Objects": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "data",
"_id": "2",
"_score": 1,
"_source": {
"market": "Chicago",
"geo_location": {
"lon": -87.5678,
"lat": 41.1234
},
"id": 1808
}
}
]
}
}
},
{
"key": 1825,
"doc_count": 1,
"Nesting": {
"doc_count": 3,
"SumSeconds": {
"value": 3600
}
},
"Objects": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "data",
"_id": "1",
"_score": 1,
"_source": {
"market": "Chicago",
"geo_location": {
"lon": -87.5678,
"lat": 41.1234
},
"id": 1825
}
}
]
}
}
}
]
}
}
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.