簡體   English   中英

彈性搜索嵌套子聚合

[英]elastic search nested sub aggregations

我們正在使用彈性搜索,它將記錄保存為具有以下定義的文檔

{
"loadtender": {
    "aliases": {},
    "mappings": {
        "_doc": {
            "_meta": {
                "version": 20
            },
            "properties": {
                "carrierId": {
                    "type": "long"
                },
                "destinationData": {
                    "type": "keyword"
                },
                "destinationZip": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 50
                        }
                    }
                },
                "effStartTime": {
                    "type": "date"
                },
                "endTime": {
                    "type": "date"
                },
                "id": {
                    "type": "long"
                },
                "mustRespondByTime": {
                    "type": "date"
                },
                "orgdiv": {
                    "type": "keyword"
                },
                "originData": {
                    "type": "keyword"
                },
                "originZip": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 50
                        }
                    }
                },
                "purchaseOrderNum": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 255
                        }
                    }
                },
                "startTime": {
                    "type": "date"
                },
                "tenderStatus": {
                    "type": "keyword"
                },
                "tenderedTime": {
                    "type": "date"
                }
            }
        }
    },
    "settings": {
        "index": {
            "creation_date": "1655105542470",
            "number_of_shards": "5",
            "number_of_replicas": "1",
            "uuid": "ohcXgA8EQ5iJj0X6_4BqXA",
            "version": {
                "created": "6080499"
            },
            "provided_name": "loadtender"
        }
    }
}

}

我正在嘗試搜索記錄以在過濾結果后返回我

輸入參數:startDate(昨天)、originData.originCity 和 originData.destinationCity

Output 需要:

  • 三桶0-30天、30-60天、60-90天
  • 在上述每個下的不同 originData.city 和 destinationData.city 組合的桶
  • 在上面的每一個下,每個唯一的carrierId的數據桶和相應的記錄列表/計數

基本上我試圖實現類似下面的東西

{
"aggregations": {
    "aggr": {
        "buckets": [
            {
                "key": "0-30 days",
                "doc_count": 10,
                "aggr": {
                    "buckets": [
                        {
                            "key": "(originCity)Menasha, WI, US|Hanover, MD, US (DestinationCity)",
                            "aggr": {
                                "buckets": [
                                    {
                                        "key": "10183-carrierId",
                                        "count": 10
                                    }
                                ]
                            }
                        }
                    ]
                }
            },
            {
                "key": "30-60 days",
                "doc_count": 11,
                "aggr": {
                    "buckets": [
                        {
                            "key": "Dallas, TX, US|Houston, TX, US",
                            "aggr": {
                                "buckets": [
                                    {
                                        "key": "10183-carrierId",
                                        "count": 10
                                    },
                                    {
                                        "key": "10022-carrierId",
                                        "count": 1
                                    }
                                ]
                            }
                        }
                    ]
                }
            }
        ]
    }
}

}

我嘗試了以下方法,但我認為我沒有找到使用子聚合器進一步過濾它的方法。

{
"_source":["id", "effStartTime", "carrierId", "originData", "destinationData"],
"size": 100,
"query": {
    "bool": {
        "must": [
            {
            "bool": {
                "must": [
                    {
                        "range": {
                        "startTime": {
                            "from": "2021-08-27T23:59:59.000Z",
                            "to": "2022-09-01T00:00:00.000Z",
                            "include_lower": true,
                            "include_upper": true,
                            "boost": 1
                        }
                        }
                    }
                ],
                "adjust_pure_negative": true,
                "boost": 1
            }
            }
        ],
        "must_not": [
            {
                "term": {
                    "tenderStatus": {
                    "value": "REMOVED",
                    "boost": 1
                    }
                }
            }
        ],
        "filter" : {
            "exists" : {
            "field" : "carrierId"
            }
        },
        "adjust_pure_negative": true,
        "boost": 1
    }
},
"aggregations": {
    "aggr": {
        "terms": {
            "script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]"
        }
    }

} }

我開始考慮這是否可能,或者我是否應該轉向針對同一個問題發出多個查詢

我能夠使用以下子聚合來實現相同的目標:

"aggregations": {
    "aggr":{
        "date_range": {
            "field": "startTime",
            "format": "MM-yyyy",
            "ranges": [
                {"to": "now-1M/M", "from": "now"}, --> now to 30 days back
                {"to": "now-1M/M", "from": "now-2M/M"}, from 30 days back to 60 days back
                {"to": "now-2M/M", "from": "now-3M/M"}, from 60 days back to 90 days back
                {"to": "now-3M/M", "from": "now-12M/M"}
            ]
        },
        "aggregations": {
                "aggr":{
                    "terms": {
                        "script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]" --> concatenated origin and destination address as a key
                    },
                    "aggregations": {
                        "aggr": {
                            "terms": {
                                "field": "carrierId" --> nested carrier count
                            }
                        }
                    }
                }
        }
    }

}

以下是我收到的響應模板。

"aggregations": {
"aggr": {
  "buckets": [
    {
      "key": "09-2021-06-2022",
      "from": 1630454400000,
      "from_as_string": "09-2021",
      "to": 1654041600000,
      "to_as_string": "06-2022",
      "doc_count": 1,
      "aggr": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "Dallas, TX, US|Houston, TX, US",
            "doc_count": 14,
            "aggr": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                {
                  "key": 10022,
                  "doc_count": 14
                }
              ]
            }
          }
        ]
      }
    }
  ]
}

}

感謝大家的努力和時間。 如果您發現任何更好的方法,請告訴我。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM