I am logging analytics for the flow. For the field "start" set to "true" when a flow starts and "true" will be set to the field "end" on flow ends. Few flows may not contain "end" field "true". I want to find where the flow exactly stops.
I have tried with nested aggregation, but not able to get the documents for the flows which are not ends.
This is the data stored in the elastic search
[
{
"date": 1565094409535,
"start": "true",
"end": "",
"message": "Select Option",
"context": "third",
"account_id": "123"
},
{
"date": 1565094411152,
"start": "",
"end": "",
"message": "Select Me",
"context": "third",
"account_id": "123"
},
{
"date": 1565094409652,
"start": "true",
"end": "",
"message": "Select option",
"context": "third",
"account_id": "123"
},
{
"date": 1565094409751,
"start": "",
"end": "",
"message": "Select Me",
"context": "third",
"account_id": "123"
},
{
"date": 1565094411187,
"start": "",
"end": "true",
"message": "Bye Bye",
"context": "third",
"account_id": "123"
},
{
"date": 1565094411211,
"start": "true",
"end": "",
"message": "Select option",
"context": "third",
"account_id": "123"
},
{
"date": 1565094411311,
"start": "true",
"end": "",
"message": "How are you",
"context": "second",
"account_id": "123"
}
]
Query used:
{
"size": 0,
"query": {
"bool": {
"must": [{
"term": {
"context.keyword": "third"
}
}
]
}
},
"aggs": {
"sessions": {
"terms": {
"field": "account_id.keyword",
"size": 25000
},
"aggs": {
"top_sessions_hits": {
"top_hits": {
"sort": [{
"date": {
"order": "asc"
}
}],
"_source": {
"includes": ["date", "message", "account_id", "start", "end", "context"]
},
"size": 10000
}
}
}
}
}
}
I am getting the below output
{
"took": 37,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"sessions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "123",
"doc_count": 6,
"top_sessions_hits": {
"hits": {
"total": 6,
"max_score": null,
"hits": [
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094409535,
"start": "true",
"end": "",
"message": "Select Option",
"context": "third",
"account_id": "123"
},
"sort": [
1565094409535
]
},{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094411152,
"start": "",
"end": "",
"message": "Select Me",
"context": "third",
"account_id": "123"
},
"sort": [
1565094411152
]
},
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094409652,
"start": "true",
"end": "",
"message": "Select option",
"context": "third",
"account_id": "123"
},
"sort": [
1565094409652
]
},
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094409751,
"start": "",
"end": "",
"message": "Select Me",
"context": "third",
"account_id": "123"
},
"sort": [
1565094409751
]
},
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094411187,
"start": "",
"end": "true",
"message": "Bye Bye",
"context": "third",
"account_id": "123"
},
"sort": [
1565094411187
]
},
{
"_index": "messages",
"_type": "doc",
"_id": "********",
"_score": null,
"_source": {
"date": 1565094411211,
"start": "true",
"end": "",
"message": "Select option",
"context": "third",
"account_id": "123"
},
"sort": [
1565094411211
]
}
]
}
}
}
]
}
}
}
But I don't want to get the document #3, #4 & #5 since the flow was completed.
I am very new to elastic search. Since the core guy went on leave, I am trying to fix a few issues. Please guide me to get the document #1, #2 & #6.
I understand that there are 2 messages in each flow - one with "start":true
and one with "end":true
. in order to find the flows that have only start, but no end, you need to have a unique identifier on each flow, like lets say flow-id
.
In case the messages will contain flow-id
you coult run a term aggregation on the flow-id, to count how many messages exists for each flow, and then sort ascending the results based on the _count of aggregation result - the first agg results would be with count=1, therefore flows which have only start and no end.
The query should look like:
GET /flows_index/_search {
"size": 0,
"aggs": {
"flow_id_agg": {
"terms": {
"field": "flow_id",
"order": {
"_count": "asc"
},
"aggs": {
"flow_id_samples": {
"top_hits": {
"sort": [{
"date": {
"order": "asc"
}
}],
"_source": {
"includes": ["date", "message", "account_id", "start", "end", "context"]
},
"size": 10000
}
}
}
}
}
}
}
Take a look at a similar need here: Elasticsearch terms aggregation and querying
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.