[英]Elastic search apply boost only for certain documents based on their field value
[英]Elastic Search - Is there a way to ignore certain documents based on condition during aggregation?
我正在記錄流的分析。 對於流開始時將字段“start”設置為“true”,流結束時將“true”設置為字段“end”。 很少有流可能不包含“結束”字段“真”。 我想找到流量完全停止的位置。
我嘗試使用嵌套聚合,但無法獲取非結束流的文檔。
這是存儲在彈性搜索中的數據
[
{
"date": 1565094409535,
"start": "true",
"end": "",
"message": "Select Option",
"context": "third",
"account_id": "123"
},
{
"date": 1565094411152,
"start": "",
"end": "",
"message": "Select Me",
"context": "third",
"account_id": "123"
},
{
"date": 1565094409652,
"start": "true",
"end": "",
"message": "Select option",
"context": "third",
"account_id": "123"
},
{
"date": 1565094409751,
"start": "",
"end": "",
"message": "Select Me",
"context": "third",
"account_id": "123"
},
{
"date": 1565094411187,
"start": "",
"end": "true",
"message": "Bye Bye",
"context": "third",
"account_id": "123"
},
{
"date": 1565094411211,
"start": "true",
"end": "",
"message": "Select option",
"context": "third",
"account_id": "123"
},
{
"date": 1565094411311,
"start": "true",
"end": "",
"message": "How are you",
"context": "second",
"account_id": "123"
}
]
使用的查詢:
{
"size": 0,
"query": {
"bool": {
"must": [{
"term": {
"context.keyword": "third"
}
}
]
}
},
"aggs": {
"sessions": {
"terms": {
"field": "account_id.keyword",
"size": 25000
},
"aggs": {
"top_sessions_hits": {
"top_hits": {
"sort": [{
"date": {
"order": "asc"
}
}],
"_source": {
"includes": ["date", "message", "account_id", "start", "end", "context"]
},
"size": 10000
}
}
}
}
}
}
我得到以下輸出
{
"took": 37,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"sessions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "123",
"doc_count": 6,
"top_sessions_hits": {
"hits": {
"total": 6,
"max_score": null,
"hits": [
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094409535,
"start": "true",
"end": "",
"message": "Select Option",
"context": "third",
"account_id": "123"
},
"sort": [
1565094409535
]
},{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094411152,
"start": "",
"end": "",
"message": "Select Me",
"context": "third",
"account_id": "123"
},
"sort": [
1565094411152
]
},
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094409652,
"start": "true",
"end": "",
"message": "Select option",
"context": "third",
"account_id": "123"
},
"sort": [
1565094409652
]
},
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094409751,
"start": "",
"end": "",
"message": "Select Me",
"context": "third",
"account_id": "123"
},
"sort": [
1565094409751
]
},
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094411187,
"start": "",
"end": "true",
"message": "Bye Bye",
"context": "third",
"account_id": "123"
},
"sort": [
1565094411187
]
},
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094411211,
"start": "true",
"end": "",
"message": "Select option",
"context": "third",
"account_id": "123"
},
"sort": [
1565094411211
]
}
]
}
}
}
]
}
}
}
但由於流程已完成,我不想獲得文檔 #3、#4 和 #5。
我對彈性搜索很陌生。 由於核心人員休假,我正在嘗試解決一些問題。 請指導我獲取文檔 #1、#2 和 #6。
我知道每個流中有 2 條消息 - 一條帶有"start":true
,一條帶有"end":true
。 為了找到只有開始但沒有結束的流,您需要在每個流上都有一個唯一的標識符,比如flow-id
。
如果消息將包含flow-id
您可以在流 ID 上運行術語聚合,以計算每個流存在多少消息,然后根據聚合結果的 _count 對結果進行升序排序 - 第一個聚合結果將計數=1,因此只有開始沒有結束的流。
查詢應如下所示:
GET /flows_index/_search {
"size": 0,
"aggs": {
"flow_id_agg": {
"terms": {
"field": "flow_id",
"order": {
"_count": "asc"
},
"aggs": {
"flow_id_samples": {
"top_hits": {
"sort": [{
"date": {
"order": "asc"
}
}],
"_source": {
"includes": ["date", "message", "account_id", "start", "end", "context"]
},
"size": 10000
}
}
}
}
}
}
}
在這里看看一個類似的需求: Elasticsearch termaggregation and querying
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.