简体   繁体   中英

How not to parse some fields by logstash?

I have a log-file which looks like this (simplified):

 { "startDate": "2015-05-27", "endDate": "2015-05-27", 
    "request" : {"requestId":"123","field2":1,"field2": 2,"field3":3, ....} }

Log-stash tries to parse all fields including field "request". But is it possible not to parse this field?
I want to see the "request" field in elastic-search but it shouldn't be parsed.

here is a part of my config file:

input {
    file {
        type => "json"
        path => [
                "/var/log/service/restapi.log"
        ]
        tags => ["restapi"]
    }
}

filter {
    ruby {
        init => "require 'socket'"
        code => "
           event['host'] = Socket.gethostname.gsub(/\..*/, '')
           event['request'] = (event['request'].to_s);
        "
    }

    if "restapi" in [tags] {
        json {
            source => "message"
        }
        date {
                match => [ "date_start", "yyyy-MM-dd HH:mm:ss" ]
                target => "date_start"
         }
        date {
                match => [ "date_end", "yyyy-MM-dd HH:mm:ss" ]
                target => "date_end"
        }
        date {
                match => [ "date", "yyyy-MM-dd HH:mm:ss" ]
                target => "date"
        }
    }
}
output {
    if "restapi" in [tags] {
        elasticsearch {
            hosts => ["......."]
            template_name => "logs"
            template => "/etc/logstash/templates/service.json"
            template_overwrite => true
            index => "service-logs-%{+YYYY.MM.dd}"
            idle_flush_time => 20
            flush_size => 500
        }
    }
}

here is my template file:

{
  "template" : "service-*",
  "settings" : {
    "index": {
            "refresh_interval": "60s",
            "number_of_shards": 6,
            "number_of_replicas": 2
        }
  },
  "mappings" : {
    "logs" : {
        "properties" : {
        "@timestamp" : { "type" : "date", "format" : "dateOptionalTime" },
        "@version" : { "type" : "integer", "index" : "not_analyzed" },
        "message": { "type" : "string", "norms" : { "enabled" : false } },
        "method" : { "type" : "string", "index" : "not_analyzed" },
        "traffic_source" : { "type" : "string", "index" : "not_analyzed" },
        "request_path" : { "type" : "string", "index" : "not_analyzed" },
        "status" : { "type" : "integer", "index" : "not_analyzed" },
        "host_name" : { "type" : "string", "index" : "not_analyzed" },
        "environment" : { "type" : "string", "index" : "not_analyzed" },
        "action" : { "type" : "string", "index" : "not_analyzed" },
        "request_id" : { "type" : "string", "index" : "not_analyzed" },
        "date" : { "type" : "date", "format" : "dateOptionalTime" },
        "date_start" : { "type" : "date", "format" : "dateOptionalTime" },
        "date_end" : { "type" : "date", "format" : "dateOptionalTime" },
        "adnest_type" : { "type" : "string", "index" : "not_analyzed" },
        "request" : { "type" : "string", "index" : "not_analyzed" }
      }
    }
  }
}

here is from logstash.log

response=>{"create"=>{"_index"=>"logs-2017.02.08", "_type"=>"json", "_id"=>"AVoeNgdhD5iEO87EVF_n", "status" =>400, "error"=> "type"=>"mapper_parsing_exception", "reason"=>"failed to parse [request]", "caused_by"=>{"type"=>"illegal_argument_exception", "reason"=>"unknown property [requestId]" }}}}, :level=>:warn}

You should be able to do this with a ruby filter:

filter {
    ruby {
        init => "require 'socket'"
        code => "
           event['host'] = Socket.gethostname.gsub(/\..*/, '')
           event['request'] = (event['request'].to_s);
        "
    }

    if "restapi" in [tags] {
        ruby {
                code => '
                    require "json"
                    event.set("request",event.get("request").to_json)'
        }
        date {
                match => [ "date_start", "yyyy-MM-dd HH:mm:ss" ]
                target => "date_start"
         }
        date {
                match => [ "date_end", "yyyy-MM-dd HH:mm:ss" ]
                target => "date_end"
        }
        date {
                match => [ "date", "yyyy-MM-dd HH:mm:ss" ]
                target => "date"
        }
    }
}

When testing this with stubbed out stdin/stdout:

input {
 stdin { codec => json }
}
// above filter{} block here
output {
  stdout { codec=>rubydebug}
}

And testing like this:

echo '{ "startDate": "2015-05-27", "endDate": "2015-05-27", "request" : {"requestId":"123","field2":1,"field2": 2,"field3":3} }' | bin/logstash -f test.conf

It outputs this:

{
     "startDate" => "2015-05-27",
       "endDate" => "2015-05-27",
       "request" => "{\"requestId\"=>\"123\", \"field2\"=>2, \"field3\"=>3}",
      "@version" => "1",
    "@timestamp" => "2017-02-09T14:37:02.789Z",
          "host" => "xxxx"
}

So I've answered your original question. You should ask another question if you can't figure out why your template isn't working.

ElasticSearch analyzes the field by default. If what you need is just not to analyze the request field, change how this is indexed by setting "index": "not-analyzed" in the mapping of the field.

More info from the documentation here

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM