繁体   English   中英

弹性搜索嵌套子聚合

[英]elastic search nested sub aggregations

我们正在使用弹性搜索,它将记录保存为具有以下定义的文档

{
"loadtender": {
    "aliases": {},
    "mappings": {
        "_doc": {
            "_meta": {
                "version": 20
            },
            "properties": {
                "carrierId": {
                    "type": "long"
                },
                "destinationData": {
                    "type": "keyword"
                },
                "destinationZip": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 50
                        }
                    }
                },
                "effStartTime": {
                    "type": "date"
                },
                "endTime": {
                    "type": "date"
                },
                "id": {
                    "type": "long"
                },
                "mustRespondByTime": {
                    "type": "date"
                },
                "orgdiv": {
                    "type": "keyword"
                },
                "originData": {
                    "type": "keyword"
                },
                "originZip": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 50
                        }
                    }
                },
                "purchaseOrderNum": {
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword",
                            "ignore_above": 255
                        }
                    }
                },
                "startTime": {
                    "type": "date"
                },
                "tenderStatus": {
                    "type": "keyword"
                },
                "tenderedTime": {
                    "type": "date"
                }
            }
        }
    },
    "settings": {
        "index": {
            "creation_date": "1655105542470",
            "number_of_shards": "5",
            "number_of_replicas": "1",
            "uuid": "ohcXgA8EQ5iJj0X6_4BqXA",
            "version": {
                "created": "6080499"
            },
            "provided_name": "loadtender"
        }
    }
}

}

我正在尝试搜索记录以在过滤结果后返回我

输入参数:startDate(昨天)、originData.originCity 和 originData.destinationCity

Output 需要:

  • 三桶0-30天、30-60天、60-90天
  • 在上述每个下的不同 originData.city 和 destinationData.city 组合的桶
  • 在上面的每一个下,每个唯一的carrierId的数据桶和相应的记录列表/计数

基本上我试图实现类似下面的东西

{
"aggregations": {
    "aggr": {
        "buckets": [
            {
                "key": "0-30 days",
                "doc_count": 10,
                "aggr": {
                    "buckets": [
                        {
                            "key": "(originCity)Menasha, WI, US|Hanover, MD, US (DestinationCity)",
                            "aggr": {
                                "buckets": [
                                    {
                                        "key": "10183-carrierId",
                                        "count": 10
                                    }
                                ]
                            }
                        }
                    ]
                }
            },
            {
                "key": "30-60 days",
                "doc_count": 11,
                "aggr": {
                    "buckets": [
                        {
                            "key": "Dallas, TX, US|Houston, TX, US",
                            "aggr": {
                                "buckets": [
                                    {
                                        "key": "10183-carrierId",
                                        "count": 10
                                    },
                                    {
                                        "key": "10022-carrierId",
                                        "count": 1
                                    }
                                ]
                            }
                        }
                    ]
                }
            }
        ]
    }
}

}

我尝试了以下方法,但我认为我没有找到使用子聚合器进一步过滤它的方法。

{
"_source":["id", "effStartTime", "carrierId", "originData", "destinationData"],
"size": 100,
"query": {
    "bool": {
        "must": [
            {
            "bool": {
                "must": [
                    {
                        "range": {
                        "startTime": {
                            "from": "2021-08-27T23:59:59.000Z",
                            "to": "2022-09-01T00:00:00.000Z",
                            "include_lower": true,
                            "include_upper": true,
                            "boost": 1
                        }
                        }
                    }
                ],
                "adjust_pure_negative": true,
                "boost": 1
            }
            }
        ],
        "must_not": [
            {
                "term": {
                    "tenderStatus": {
                    "value": "REMOVED",
                    "boost": 1
                    }
                }
            }
        ],
        "filter" : {
            "exists" : {
            "field" : "carrierId"
            }
        },
        "adjust_pure_negative": true,
        "boost": 1
    }
},
"aggregations": {
    "aggr": {
        "terms": {
            "script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]"
        }
    }

} }

我开始考虑这是否可能,或者我是否应该转向针对同一个问题发出多个查询

我能够使用以下子聚合来实现相同的目标:

"aggregations": {
    "aggr":{
        "date_range": {
            "field": "startTime",
            "format": "MM-yyyy",
            "ranges": [
                {"to": "now-1M/M", "from": "now"}, --> now to 30 days back
                {"to": "now-1M/M", "from": "now-2M/M"}, from 30 days back to 60 days back
                {"to": "now-2M/M", "from": "now-3M/M"}, from 60 days back to 90 days back
                {"to": "now-3M/M", "from": "now-12M/M"}
            ]
        },
        "aggregations": {
                "aggr":{
                    "terms": {
                        "script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]" --> concatenated origin and destination address as a key
                    },
                    "aggregations": {
                        "aggr": {
                            "terms": {
                                "field": "carrierId" --> nested carrier count
                            }
                        }
                    }
                }
        }
    }

}

以下是我收到的响应模板。

"aggregations": {
"aggr": {
  "buckets": [
    {
      "key": "09-2021-06-2022",
      "from": 1630454400000,
      "from_as_string": "09-2021",
      "to": 1654041600000,
      "to_as_string": "06-2022",
      "doc_count": 1,
      "aggr": {
        "doc_count_error_upper_bound": 0,
        "sum_other_doc_count": 0,
        "buckets": [
          {
            "key": "Dallas, TX, US|Houston, TX, US",
            "doc_count": 14,
            "aggr": {
              "doc_count_error_upper_bound": 0,
              "sum_other_doc_count": 0,
              "buckets": [
                {
                  "key": 10022,
                  "doc_count": 14
                }
              ]
            }
          }
        ]
      }
    }
  ]
}

}

感谢大家的努力和时间。 如果您发现任何更好的方法,请告诉我。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM