I'm transforming index that contains following event. But the values inside of array are splitting into the new events.
eg: "serviceIdentifiers": "Redis" "serviceIdentifiers":"Event_Detector Servicc" etc.
{
"_index": "collated_txn_health_2022.05",
"_type": "_doc",
"_id": "LAUpboIBh6CUatILrsN3",
"_score": 1,
"_source": {
"timeInGMT": 0,
"kpiId": 0,
"compInstanceIdentifier": "d0352b7d-0484-4714-bbc8-eb67cbb7be70",
"agentIdentifier": "ComponentAgent-171",
"kpiIdentifier": "PACKETS_DROPPED",
"categoryIdentifier": "Network Utilization",
"applicationIdentifier": null,
"serviceIdentifiers": [
"Supervisor_Controller Service",
"Event_Detector Service",
"UI_Service",
"Redis",
"CC_Service"
],
"clusterIdentifiers": [
"a5c57ef5-4018-41b8-b727-27c8f8376c0e"
],
"collectionInterval": 60,
"value": "0.0",
"kpiType": "Core",
"groupAttribute": "ALL",
"groupIdentifier": null,
"watcherValue": null,
"errorCode": null,
"clusterOperation": null,
"aggLevelInMins": 1,
"error": false,
"kpiGroup": false,
"discovery": false,
"maintenanceExcluded": false,
"@timestamp": "2022-05-01T01:32:00.000Z"
}
Following is the transform job configuration.
curl -u admin:admin -XPUT "http://XXX.XXX.XX.XXX9201/_plugins/_transform/my-array-job-2" -H 'Content-type: application/json' -d'
{
"transform": {
"schedule": {
"interval": {
"start_time": 1659705000000,
"period": 1,
"unit": "Minutes"
}
},
"metadata_id": null,
"updated_at": 1659456180000,
"enabled": true,
"enabled_at": 1659457620000,
"description": "",
"source_index": "collated_txn_health_2022.05",
"data_selection_query": {
"match_all": {
"boost": 1
}
},
"target_index": "transform_collated_txn_health_2022.05",
"page_size": 1000,
"groups": [
{
"date_histogram": {
"fixed_interval": "1m",
"source_field": "@timestamp",
"target_field": "@timestamp",
"timezone": "Asia/Calcutta"
}
},
{
"terms": {
"source_field": "clusterIdentifiers",
"target_field": "clusterIdentifiers"
}
},
{
"terms": {
"source_field": "serviceIdentifiers",
"target_field": "serviceIdentifiers"
}
},
{
"terms": {
"source_field": "compInstanceIdentifier",
"target_field": "compInstanceIdentifier"
}
},
{
"terms": {
"source_field": "agentIdentifier",
"target_field": "agentIdentifier"
}
}
],
"aggregations": {
"count_@timestamp": {
"value_count": {
"field": "@timestamp"
}
}
}
}
}'
Following are the events from the transform index.
{
"_index": "transform_heal_collated_txn_health_2022.05",
"_type": "_doc",
"_id": "ybK0McQ9NZrt9xdo9iWKbA",
"_score": 1,
"_source": {
"transform._id": "my-array-job-2",
"transform._doc_count": 2,
"@timestamp": 1651365120000,
"clusterIdentifiers": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
"serviceIdentifiers": "Redis",
"compInstanceIdentifier": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
"agentIdentifier": "ComponentAgent-170",
"count_@timestamp": 2
}
},
{
"_index": "transform_heal_collated_txn_health_2022.05",
"_type": "_doc",
"_id": "Wf-4KwnFaYuw9bL-V-9WEQ",
"_score": 1,
"_source": {
"transform._id": "my-array-job-2",
"transform._doc_count": 2,
"@timestamp": 1651365120000,
"clusterIdentifiers": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
"serviceIdentifiers": "Redis_Server Service",
"compInstanceIdentifier": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
"agentIdentifier": "ComponentAgent-170",
"count_@timestamp": 2
}
It would be a great help if somebody suggest me with solution for array fields.
Have solved the issue with following painless script. Which help to transform array fields in opensearch.
PUT _plugins/_transform/my-array-job-2
{
"transform": {
"schedule": {
"interval": {
"start_time": 1659705000000,
"period": 1,
"unit": "Minutes"
}
},
"metadata_id": null,
"updated_at": 1659456180000,
"enabled": true,
"enabled_at": 1659457620000,
"description": "",
"source_index": "heal_collated_txn_heal_health_2022.05_reindex",
"target_index": "transform_heal_collated_txn_heal_health_2022.05",
"page_size": 1000,
"groups": [
{
"date_histogram": {
"fixed_interval": "1m",
"source_field": "@timestamp",
"target_field": "@timestamp",
"timezone": "Asia/Calcutta"
}
},
{
"terms": {
"source_field": "kpiIdentifier",
"target_field": "kpiIdentifier"
}
},
{
"terms": {
"source_field": "clusterIdentifiers",
"target_field": "clusterIdentifiers"
}
}
],
"aggregations": {
"count_@timestamp": {
"value_count": {
"field": "@timestamp"
}
},
"count_agentIdentifier": {
"value_count": {
"field": "agentIdentifier"
}
},
"sum_value": {
"sum": {
"field": "value"
}
},
"max_value": {
"max": {
"field": "value"
}
},
"avg_value": {
"avg": {
"field": "value"
}
},
"count_value": {
"value_count": {
"field": "value"
}
},
"percentiles_value": {
"percentiles": {
"field": "value",
"percents": [
95
],
"keyed": true,
"tdigest": {
"compression": 100
}
}
},
"serviceIdentifiers": {
"scripted_metric": {
"init_script": "state.docs = []",
"map_script": """
Map span = [
'url':doc['serviceIdentifiers']
];
state.docs.add(span)
""",
"combine_script": "return state.docs;",
"reduce_script": """
def all_docs = [];
for (s in states) {
for (span in s) {
all_docs.add(span);
}
}
def size = all_docs.size();
def serviceIdentifiers_1 = all_docs[0]['url'];
def ret = new HashMap();
ret['serviceIdentifiers'] = serviceIdentifiers_1;
return ret;
"""
}
}
}
}
}
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.