简体   繁体   中英

How to access the date_histogram key field in the child aggregation in elasticsearch?

I want to apply some filters on the bucket response generated by the date_histogram , that filter is dependent on the key of the date_histogram output buckets.

Suppose I have following data in

{
   "entryTime":"",
   "soldTime:""
}

the elastic query is something like this

{
  "aggs": {
    "date": {
      "date_histogram": {
        "field": "entryTime",
        "interval": "month",
        "keyed": true
      },
      "aggs": {
        "filter_try": {
          "filter": {
            "bool": {
              "must": [
                {
                  "range": {
                    "entryTime": {
                      "lte": 1588840533000
                    }
                  }
                },
                {
                  "bool": {
                    "should": [
                      {
                        "bool": {
                          "must": [
                            {
                              "exists": {
                                "field": "soldTime"
                              }
                            },
                            {
                              "range": {
                                "soldTime": {
                                  "gt": 1588840533000
                                }
                              }
                            }
                          ]
                        }
                      },
                      {
                        "bool": {
                          "must_not": [
                            {
                              "exists": {
                                "field": "soldTime"
                              }
                            }
                          ]
                        }
                      }
                    ]
                  }
                }
              ]
            }
          }
        }
      }
    }
  }
}

so here in that bool query, I want to use the date generated for the specific bucket by date_histogram aggregation in both the range clauses instead of the hardcoded epoch time.

Even if we can access using script then also it's fine.

for further clarification, this is the boolean query and in the query want to replace this "DATE" with the date_histogram bucket key.

# (entryTime < DATE) 
# AND 
# (
#    (soldTime != null AND soldTime > DATE) 
#          OR 
#      (soldTime == NULL)
#  )

Consider below 10 Document I have:

"hits" : [
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1577869200000",
          "soldTime" : "1578646800000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1578214800000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "3",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1578560400000",
          "soldTime" : "1579942800000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "4",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1579683600000",
          "soldTime" : "1581325200000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "5",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1580893200000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "6",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1582189200000",
          "soldTime" : "1582362000000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "7",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1582621200000",
          "soldTime" : "1584349200000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "8",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1583053200000",
          "soldTime" : "1583830800000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "9",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1584262800000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "10",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1585472400000"
        }
      }
    ]

Now the end of January 2020 in epoch is -> 1580515199000

So if I apply on the above-mentioned bool query,

Will get the output as the

"hits" : [
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "4",
        "_score" : 3.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1579683600000",
          "soldTime" : "1581325200000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1578214800000"
        }
      }
    ]

As document with ID 4 satisfy (soldTime != null AND soldTime > DATE) and document with ID 2 satisfy (soldTime == null) condition from OR part.

Now for the same bool request If I use the date of end February 2020 -> 1583020799000 , will get the hits as follows

"hits" : [
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "7",
        "_score" : 3.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1582621200000",
          "soldTime" : "1584349200000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1578214800000"
        }
      },
      {
        "_index" : "vi_test",
        "_type" : "_doc",
        "_id" : "5",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1580893200000"
        }
      }
    ]
  • ID 7: Entry in Feb, but sold in March so is in stock for Feb-2020
  • ID 2: Entry in Jan, not sold yet means in the stock
  • ID 5: Entry in Feb, not sold yet means in the stock

Now the same data required for each end of the month of a whole year to plot the trend.

Thank you

I couldn't find a way using normal queries as parent aggregation key is not available in sub aggregation. I have written a script for this which selects documents where soldTime is either null or doesnot fall in same month as entryTime

{
  "query": {
    "script": {
      "script": """
         ZonedDateTime entry;
         ZonedDateTime sold;
         if(doc['entryTime'].size()>0)
         {
           entry= doc['entryTime'].value;
         }
         if(doc['soldTime'].size()>0) 
         {
           sold = doc['soldTime'].value;
         }
         if(sold==null || ( entry.getMonthValue()!==sold.getMonthValue()|| entry.getYear()!==sold.getYear()))
         {
           return true;
         }
         else false;
"""
    }
  },
  "size": 10,
  "aggs": {
    "monthly_trend": {
      "date_histogram": {
        "field": "entryTime",
        "interval": "month"
      },
      "aggs": {
        "docs": {
          "top_hits": {
            "size": 10
          }
        }
      }
    }
  }
}

    "hits" : [
      {
        "_index" : "index22",
        "_type" : "_doc",
        "_id" : "55Kv83EB8a54AbXfngYU",
        "_score" : 1.0,
        "_source" : {
          "deaerId" : "4",
          "entryTime" : "1578214800000"
        }
      }
    ]
  },
  "aggregations" : {
    "monthly_trend" : {
      "buckets" : [
        {
          "key_as_string" : "2020-01-01T00:00:00.000Z",
          "key" : 1577836800000,
          "doc_count" : 1,
          "docs" : {
            "hits" : {
              "total" : {
                "value" : 1,
                "relation" : "eq"
              },
              "max_score" : 1.0,
              "hits" : [
                {
                  "_index" : "index22",
                  "_type" : "_doc",
                  "_id" : "55Kv83EB8a54AbXfngYU",
                  "_score" : 1.0,
                  "_source" : {
                    "deaerId" : "4",
                    "entryTime" : "1578214800000"
                  }
                }
              ]
            }
          }
        }
      ]
    }
  }

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM