简体   繁体   中英

elastic search nested sub aggregations

We are using elastic search which holds records as documents with following definition

{
"loadtender": {
    "aliases": {},
    "mappings": {
        "_doc": {
            "_meta": {
                "version": 20
            },
            "properties": {
                "carrierId": {
                    "type": "long"
                },
                "destinationData": {
                    "type": "keyword"
                },
                "destinationZip": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 50
                        }
                    }
                },
                "effStartTime": {
                    "type": "date"
                },
                "endTime": {
                    "type": "date"
                },
                "id": {
                    "type": "long"
                },
                "mustRespondByTime": {
                    "type": "date"
                },
                "orgdiv": {
                    "type": "keyword"
                },
                "originData": {
                    "type": "keyword"
                },
                "originZip": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 50
                        }
                    }
                },
                "purchaseOrderNum": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 255
                        }
                    }
                },
                "startTime": {
                    "type": "date"
                },
                "tenderStatus": {
                    "type": "keyword"
                },
                "tenderedTime": {
                    "type": "date"
                }
            }
        }
    },
    "settings": {
        "index": {
            "creation_date": "1655105542470",
            "number_of_shards": "5",
            "number_of_replicas": "1",
            "uuid": "ohcXgA8EQ5iJj0X6_4BqXA",
            "version": {
                "created": "6080499"
            },
            "provided_name": "loadtender"
        }
    }
}

}

I am trying to search records to return me following filtered results

Input Parameter: startDate (yesterday), originData.originCity and originData.destinationCity

Output Required:

  • Three buckets for 0-30 days, 30-60 days and 60-90 days
  • buckets of distinct originData.city and destinationData.city combinations under each of the above
  • Under each of the above, buckets of data for each unique carrierId and the corresponding record list / count

Basically I was trying to achieve something like the below

{
"aggregations": {
    "aggr": {
        "buckets": [
            {
                "key": "0-30 days",
                "doc_count": 10,
                "aggr": {
                    "buckets": [
                        {
                            "key": "(originCity)Menasha, WI, US|Hanover, MD, US (DestinationCity)",
                            "aggr": {
                                "buckets": [
                                    {
                                        "key": "10183-carrierId",
                                        "count": 10
                                    }
                                ]
                            }
                        }
                    ]
                }
            },
            {
                "key": "30-60 days",
                "doc_count": 11,
                "aggr": {
                    "buckets": [
                        {
                            "key": "Dallas, TX, US|Houston, TX, US",
                            "aggr": {
                                "buckets": [
                                    {
                                        "key": "10183-carrierId",
                                        "count": 10
                                    },
                                    {
                                        "key": "10022-carrierId",
                                        "count": 1
                                    }
                                ]
                            }
                        }
                    ]
                }
            }
        ]
    }
}

}

I've tried the following but I think I am not finding a way to filter it further using the sub aggregators.

{
"_source":["id", "effStartTime", "carrierId", "originData", "destinationData"],
"size": 100,
"query": {
    "bool": {
        "must": [
            {
            "bool": {
                "must": [
                    {
                        "range": {
                        "startTime": {
                            "from": "2021-08-27T23:59:59.000Z",
                            "to": "2022-09-01T00:00:00.000Z",
                            "include_lower": true,
                            "include_upper": true,
                            "boost": 1
                        }
                        }
                    }
                ],
                "adjust_pure_negative": true,
                "boost": 1
            }
            }
        ],
        "must_not": [
            {
                "term": {
                    "tenderStatus": {
                    "value": "REMOVED",
                    "boost": 1
                    }
                }
            }
        ],
        "filter" : {
            "exists" : {
            "field" : "carrierId"
            }
        },
        "adjust_pure_negative": true,
        "boost": 1
    }
},
"aggregations": {
    "aggr": {
        "terms": {
            "script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]"
        }
    }

} }

I started beginning to think if this is even possible OR should I shift to issuing multiple queries for the same

I was able to achieve the same using the following sub-aggregations:

"aggregations": {
    "aggr":{
        "date_range": {
            "field": "startTime",
            "format": "MM-yyyy",
            "ranges": [
                {"to": "now-1M/M", "from": "now"}, --> now to 30 days back
                {"to": "now-1M/M", "from": "now-2M/M"}, from 30 days back to 60 days back
                {"to": "now-2M/M", "from": "now-3M/M"}, from 60 days back to 90 days back
                {"to": "now-3M/M", "from": "now-12M/M"}
            ]
        },
        "aggregations": {
                "aggr":{
                    "terms": {
                        "script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]" --> concatenated origin and destination address as a key
                    },
                    "aggregations": {
                        "aggr": {
                            "terms": {
                                "field": "carrierId" --> nested carrier count
                            }
                        }
                    }
                }
        }
    }

}

Following is the response template that I receive.

"aggregations": {
"aggr": {
  "buckets": [
    {
      "key": "09-2021-06-2022",
      "from": 1630454400000,
      "from_as_string": "09-2021",
      "to": 1654041600000,
      "to_as_string": "06-2022",
      "doc_count": 1,
      "aggr": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "Dallas, TX, US|Houston, TX, US",
            "doc_count": 14,
            "aggr": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                {
                  "key": 10022,
                  "doc_count": 14
                }
              ]
            }
          }
        ]
      }
    }
  ]
}

}

Thank you to all of you for your efforts and time. Do let me know if you discover any better way.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM