简体   繁体   中英

Json file from filebeat to Logstash and then to elasticsearch

I am trying to ingested inventory data which is produced following json fileformat .

{  
   "_meta":{  
      "hostvars":{  
         "host1":{  
            "foreman":{  
               "architecture_id":1,
               "architecture_name":"x86_64",
               "capabilities":[  
                  "build"
               ],
               "certname":"host1",
               "comment":"this is hostname1",
               "created_at":"2017-03-08T15:27:11Z",
               "disk":"10gb",
               "domain_id":5,

            },
            "foreman_facts":{  
               "boardmanufacturer":"Intel Corporation",
               "boardproductname":"440BX Desktop Reference Platform",
               "ipaddress":"1.1.1.1",
               "ipaddress_eth0":"1.1.1.2",
               "ipaddress_lo":"127.0.0.1",

            },
            "foreman_params":{  

            }
         },
         "host2":{  
            "foreman":{  
               "architecture_id":1,
               "architecture_name":"x86_64",
               "capabilities":[  
                  "build"
               ],
               "certname":"host2",
               "comment":"this hostname2",
               "created_at":"2017-03-08T15:27:11Z",
               "disk":"20gb",
               "domain_id":5,

            },
            "foreman_facts":{  
               "boardmanufacturer":"Intel Corporation",
               "boardproductname":"440BX Desktop Reference Platform",
               "ipaddress":"2.1.1.1",
               "ipaddress_eth0":"2.2.2.2",
               "ipaddress_lo":"127.0.0.1",

            },
            "foreman_params":{  

            }
         },
         "foreman_all":[  
            "host3",
            "host4",

         ],
         "foreman_environment: [ 
         "computer1",
         "computer2"
      ],

Manage to get the data in ElasticSeach using the following code .

File beat Config :

multiline.pattern: '^{'

multiline.negate: true

multiline.match: after

output.logstash:
  # The Logstash hosts
 hosts: ["localhost:5044"]

Logstash :

 input {
 beats {
        port => "5044"
       }

}

output {

elasticsearch {
        hosts => [ "10.1.7.5:9200" ]
index => "inventory-%{+YYYY-MM-dd}"
}
stdout {}

}

However I have noticed that filebeat treating whole json file as one message . Wondering If I can break the message and only send hostvars section and index the document based on the each hostname and ignore foreman_all and foreman_environment fields from the above json data . Above is sample data and I have to ingest around 100k records so want to make sure I send as less data possible on the network .

I want to ingest data in following format in Elasticsearch . Wondering if some can advise the best config to use .

Elastic doc id 1

computer name : "host1"
"architecture_id": 1,
"architecture_name": "x86_64",
"capabilities": ["build"],
"Company hardware name": "host1",
"comment": "this is hostname1",
"created_at": "2017-03-08T15:27:11Z",
"disk": "10gb",
"domain_id": 5,
"foreman_facts": {
"boardmanufacturer": "Intel Corporation",
"boardproductname": "440BX Desktop Reference Platform",
"ipaddress": "1.1.1.1",
"ipaddress_eth0": "1.1.1.2",
"ipaddress_lo": "127.0.0.1",

Elastic doc id 2

"computer name"" : "host2"
"architecture_id": 1,
"architecture_name": "x86_64",
"capabilities": ["build"],
"certname": "host2",
"comment": "this hostname2",
"created_at": "2017-03-08T15:27:11Z",
"disk": "20gb",
"domain_id": 5,
"boardmanufacturer": "Intel Corporation",
"boardproductname": "440BX Desktop Reference Platform",
"ipaddress": "2.1.1.1",
"ipaddress_eth0": "2.2.2.2",
"ipaddress_lo": "127.0.0.1",

  1. first you should set document_type in filebeat.yml like this:

     filebeat: prospectors: - input_type: log paths: - "/home/ubuntu/data/test.json" document_type: json json.message_key: log json.keys_under_root: true json.overwrite_keys: true 

And have a look at this may help: https://www.elastic.co/blog/structured-logging-filebeat

  1. then you can get the json value in logstash and set them into new field(configure in logstash.conf):

     json { source => "parameter" target => "parameterData" remove_field => "parameter" } 

Document: https://www.elastic.co/guide/en/logstash/current/plugins-filters-json.html

  1. you can use std_in and std_out in logstash for test.

I have used the below configs as per your suggestion and seeing Json error message as mentioned enter image description here below . Looks like filbeat sending each line individually , if I use multiline option as mentioned below then I see the Filebeat and logstash send the whole json file as one message . This is what I looking to break the message based on host name as mentioned above .

multiline.pattern: '^{'

multiline.negate: true

multiline.match: after

#=========================== Filebeat Configuration =============================

filebeat.prospectors:


- type: log

  # Change to true to enable this prospector configuration.
  enabled: true

  # Paths that should be crawled and fetched. Glob based paths.
  paths:
    - /opt/uploaddata/*.json
    #- c:\programdata\elasticsearch\logs\*


  ### JSON configuration

  document_type: json

  json.message_key: log


  json.keys_under_root: true

  json.overwrite_keys: true

  #json.add_error_key: false


output.logstash:
  # The Logstash hosts
  hosts: ["localhost:5044"]


#=========================== Logstash  =============================
input {
 beats {
        port => "5044"
       }
}

filter {
json

{
      source => "parameter"
      target => "parameterData"
      remove_field => "parameter"
}

}
output {

elasticsearch {
        hosts => [ "10.138.7.51:9200" ]
index => "inventory-%{+YYYY-MM-dd}"
}
stdout {
codec => rubydebug
}
}

#=========================== Filbear Errors =============================

2017/11/24 16:45:14.226665 json.go:32: ERR Error decoding JSON: json: cannot unmarshal string into Go value of type map[string]interface {}
2017/11/24 16:45:14.226757 processor.go:262: DBG Publish event: {
  "@timestamp": "2017-11-24T16:45:14.226Z",
  "@metadata": {
    "beat": "filebeat",
    "type": "doc",
    "version": "6.0.0"
  },
  "json": {},
  "message": "            \"host4\",",
  "prospector": {
    "type": "log"
  },
  "beat": {
    "name": "filebeat",
    "hostname": "filebeat",
    "version": "6.0.0"
  },
  "source": "/opt/uploaddata/data.json",
  "offset": 1710
}
2017/11/24 16:45:14.226800 json.go:32: ERR Error decoding JSON: EOF
2017/11/24 16:45:14.226889 processor.go:262: DBG Publish event: {
  "@timestamp": "2017-11-24T16:45:14.226Z",
  "@metadata": {
    "beat": "filebeat",
    "type": "doc",
    "version": "6.0.0"
  },
  "json": {},
  "message": "",
  "source": "/opt/uploaddata/data.json",
  "offset": 1712,
  "prospector": {
    "type": "log"
  },
  "beat": {
    "name": "filebeat",
    "hostname": "filebeat",
    "version": "6.0.0"
  }


#=========================== Logstash Logs  =============================

{
    "@timestamp" => 2017-11-24T16:45:14.226Z,
        "offset" => 1638,
      "@version" => "1",
          "beat" => {
            "name" => "filebeat",
        "hostname" => "filebeat",
         "version" => "6.0.0"
    },
          "host" => "filebeat",
    "prospector" => {
        "type" => "log"
    },
          "json" => {},
        "source" => "/opt/uploaddata/data.json",
       "message" => "         },",
          "tags" => [
        [0] "beats_input_codec_plain_applied"
    ]
}
{
    "@timestamp" => 2017-11-24T16:45:14.226Z,
        "offset" => 1666,
      "@version" => "1",
          "beat" => {
            "name" => "filebeat",
        "hostname" => "filebeat",
         "version" => "6.0.0"
    },
          "host" => "filebeat",
          "json" => {},
    "prospector" => {
        "type" => "log"
    },
        "source" => "/opt/uploaddata/data.json",
       "message" => "         \"foreman_all\":[  ",
          "tags" => [
        [0] "beats_input_codec_plain_applied"
    ]
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM