简体   繁体   中英

Opensearch transform splitting array values to new events

I'm transforming index that contains following event. But the values inside of array are splitting into the new events.

eg: "serviceIdentifiers": "Redis" "serviceIdentifiers":"Event_Detector Servicc" etc.

      {
        "_index": "collated_txn_health_2022.05",
        "_type": "_doc",
        "_id": "LAUpboIBh6CUatILrsN3",
        "_score": 1,
        "_source": {
          "timeInGMT": 0,
          "kpiId": 0,
          "compInstanceIdentifier": "d0352b7d-0484-4714-bbc8-eb67cbb7be70",
          "agentIdentifier": "ComponentAgent-171",
          "kpiIdentifier": "PACKETS_DROPPED",
          "categoryIdentifier": "Network Utilization",
          "applicationIdentifier": null,
          "serviceIdentifiers": [
            "Supervisor_Controller Service",
            "Event_Detector Service",
            "UI_Service",
            "Redis",
            "CC_Service"
          ],
          "clusterIdentifiers": [
            "a5c57ef5-4018-41b8-b727-27c8f8376c0e"
          ],
          "collectionInterval": 60,
          "value": "0.0",
          "kpiType": "Core",
          "groupAttribute": "ALL",
          "groupIdentifier": null,
          "watcherValue": null,
          "errorCode": null,
          "clusterOperation": null,
          "aggLevelInMins": 1,
          "error": false,
          "kpiGroup": false,
          "discovery": false,
          "maintenanceExcluded": false,
          "@timestamp": "2022-05-01T01:32:00.000Z"
        }

Following is the transform job configuration.

curl -u admin:admin -XPUT "http://XXX.XXX.XX.XXX9201/_plugins/_transform/my-array-job-2" -H 'Content-type: application/json' -d'
{
    "transform": {
        "schedule": {
            "interval": {
                "start_time": 1659705000000,
                "period": 1,
                "unit": "Minutes"
            }
        },
        "metadata_id": null,
        "updated_at": 1659456180000,
        "enabled": true,
        "enabled_at": 1659457620000,
        "description": "",
        "source_index": "collated_txn_health_2022.05",
        "data_selection_query": {
            "match_all": {
                "boost": 1
            }
          },
        "target_index": "transform_collated_txn_health_2022.05",
        "page_size": 1000,
        "groups": [
            {
                "date_histogram": {
                    "fixed_interval": "1m",
                    "source_field": "@timestamp",
                    "target_field": "@timestamp",
                    "timezone": "Asia/Calcutta"
                }
            },
            {
                "terms": {
                    "source_field": "clusterIdentifiers",
                    "target_field": "clusterIdentifiers"
                }
            },
            {
                "terms": {
                    "source_field": "serviceIdentifiers",
                    "target_field": "serviceIdentifiers"
                }
            },
            {
                "terms": {
                    "source_field": "compInstanceIdentifier",
                    "target_field": "compInstanceIdentifier"
                }
            },
            {
                "terms": {
                    "source_field": "agentIdentifier",
                    "target_field": "agentIdentifier"
                }
            }
        ],
        "aggregations": {
            "count_@timestamp": {
                "value_count": {
                    "field": "@timestamp"
                }
            }
        }
    }
}'

Following are the events from the transform index.

      {
        "_index": "transform_heal_collated_txn_health_2022.05",
        "_type": "_doc",
        "_id": "ybK0McQ9NZrt9xdo9iWKbA",
        "_score": 1,
        "_source": {
          "transform._id": "my-array-job-2",
          "transform._doc_count": 2,
          "@timestamp": 1651365120000,
          "clusterIdentifiers": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
          "serviceIdentifiers": "Redis",
          "compInstanceIdentifier": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
          "agentIdentifier": "ComponentAgent-170",
          "count_@timestamp": 2
        }
      },
      {
        "_index": "transform_heal_collated_txn_health_2022.05",
        "_type": "_doc",
        "_id": "Wf-4KwnFaYuw9bL-V-9WEQ",
        "_score": 1,
        "_source": {
          "transform._id": "my-array-job-2",
          "transform._doc_count": 2,
          "@timestamp": 1651365120000,
          "clusterIdentifiers": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
          "serviceIdentifiers": "Redis_Server Service",
          "compInstanceIdentifier": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
          "agentIdentifier": "ComponentAgent-170",
          "count_@timestamp": 2
        }

It would be a great help if somebody suggest me with solution for array fields.

Have solved the issue with following painless script. Which help to transform array fields in opensearch.

PUT _plugins/_transform/my-array-job-2
{
  
  "transform": {
        "schedule": {
            "interval": {
                "start_time": 1659705000000,
                "period": 1,
                "unit": "Minutes"
            }
        },
        "metadata_id": null,
        "updated_at": 1659456180000,
        "enabled": true,
        "enabled_at": 1659457620000,
        "description": "",
        "source_index": "heal_collated_txn_heal_health_2022.05_reindex",
        "target_index": "transform_heal_collated_txn_heal_health_2022.05",
        "page_size": 1000,
        "groups": [
            {
                "date_histogram": {
                    "fixed_interval": "1m",
                    "source_field": "@timestamp",
                    "target_field": "@timestamp",
                    "timezone": "Asia/Calcutta"
                }
            },    
            {
                "terms": {
                    "source_field": "kpiIdentifier",
                    "target_field": "kpiIdentifier"
                }
            },
            {
                "terms": {
                    "source_field": "clusterIdentifiers",
                    "target_field": "clusterIdentifiers"
                }
            }            
        ],
    "aggregations": { 
      "count_@timestamp": {
          "value_count": {
           "field": "@timestamp"
         }
       },
      "count_agentIdentifier": {
          "value_count": {
              "field": "agentIdentifier"
          }
      },
      "sum_value": {
          "sum": {
              "field": "value"
          }
      },
      "max_value": {
          "max": {
              "field": "value"
          }
      },
      "avg_value": {
          "avg": {
              "field": "value"
          }
      },
      "count_value": {
          "value_count": {
              "field": "value"
          }
      },
      "percentiles_value": {
          "percentiles": {
              "field": "value",
             "percents": [
                  95
              ],
              "keyed": true,
              "tdigest": {
                  "compression": 100
              }
          }
      },
      "serviceIdentifiers": {
        "scripted_metric": {
          "init_script": "state.docs = []", 
          "map_script": """ 
            Map span = [
              'url':doc['serviceIdentifiers']
            ];
            state.docs.add(span)
          """,
          "combine_script": "return state.docs;", 
          "reduce_script": """ 
            def all_docs = [];
            for (s in states) {
              for (span in s) {
                all_docs.add(span);
              }
            }
            def size = all_docs.size();
            def serviceIdentifiers_1 = all_docs[0]['url'];
            def ret = new HashMap();
            ret['serviceIdentifiers'] = serviceIdentifiers_1;
            return ret;
          """
        }
      }
    }
  }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM