简体   繁体   English

解析 XML Filebeat > Logstash > Elasticsearch

[英]Parsing XML Filebeat > Logstash > Elasticsearch

Goal: Parse an XML file with nested data into different elasticsearch documents.目标:将带有嵌套数据的 XML 文件解析为不同的 elasticsearch 文档。

I've chose to use logstash to help me here, but since the files will be on different servers I decided to use filebeat to serve these to logstash.我选择使用 logstash 来帮助我,但由于文件将位于不同的服务器上,因此我决定使用 filebeat 将这些文件提供给 logstash。 The setup seems sound.设置看起来很合理。

However, I can't seem to get filebeat to send all of the lines in one message, I'm getting them line by line:但是,我似乎无法让 filebeat 在一条消息中发送所有行,我正在逐行获取它们:

{
    "@timestamp" => 2017-10-15T20:30:11.825Z,
        "offset" => 44,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.825Z,
        "offset" => 108,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "<foo:statistics xsi:schemaLocation=\"http://www.foo.no foo.xsd\" ",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.825Z,
        "offset" => 141,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  xmlns:foo=\"http://www.foo.no\" ",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 198,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 231,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:version>1.0</foo:version>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 258,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:name>bar</foo:name>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 313,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:start>2017-01-01T00:06:34.880+02:00</foo:start>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 366,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:stop>2017-05-01T00:06:34.880+02:00</foo:stop>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 380,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:place>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 409,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:name>baz</foo:name>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 442,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:id>1B445T4UV-W</foo:id>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 457,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  </foo:place>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 471,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 526,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:date>2017-04-17T04:06:34.880+02:00</foo:date>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 557,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:status>2</foo:status>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 572,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  </foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 586,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 641,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:date>2017-04-18T04:06:34.880+02:00</foo:date>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 672,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:status>3</foo:status>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 687,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  </foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 701,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 756,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:date>2017-04-19T04:06:34.880+02:00</foo:date>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 787,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:status>1</foo:status>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
[2017-10-15T20:30:42,853][WARN ][logstash.filters.split   ] Only String and Array types are splittable. field:visits is of type = NilClass
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 802,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  </foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}

Here is my filebeat configuration这是我的 filebeat 配置

filebeat.prospectors:
- type: log
  paths:
    - /mnt/log/*.xml
  multiline.pattern: '<?xml .*'
  multiline.negate: false
  multiline.match: after

output.logstash:
  hosts: ["logstash:5000"]

and my XML file:和我的 XML 文件:

<?xml version="1.0" encoding="iso-8859-1"?>
<foo:statistics xsi:schemaLocation="http://www.foo.no foo.xsd" 
  xmlns:foo="http://www.foo.no" 
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <foo:version>1.0</foo:version>
  <foo:name>bar</foo:name>
  <foo:start>2017-01-01T00:06:34.880+02:00</foo:start>
  <foo:stop>2017-05-01T00:06:34.880+02:00</foo:stop>
  <foo:place>
    <foo:name>baz</foo:name>
    <foo:id>1B445T4UV-W</foo:id>
  </foo:place>
  <foo:visit>
    <foo:date>2017-04-17T04:06:34.880+02:00</foo:date>
    <foo:status>2</foo:status>
  </foo:visit>
  <foo:visit>
    <foo:date>2017-04-18T04:06:34.880+02:00</foo:date>
    <foo:status>3</foo:status>
  </foo:visit>
  <foo:visit>
    <foo:date>2017-04-19T04:06:34.880+02:00</foo:date>
    <foo:status>1</foo:status>
  </foo:visit>
</foo:statistics>

I want the whole file passed into logstash, here is the config for that.我想将整个文件传递给 logstash,这是它的配置。

input {
    beats {
        port => 5000
    }
}

filter {
    xml {
        namespaces => {
            "foo" => "http://www.foo.no"
            "xsi" => "http://www.w3.org/2001/XMLSchema-instance"
        }
        source => "message"
        store_xml => "false"

        xpath => ["/foo:statistics/foo:start/text()", "start"]
        xpath => ["/foo:statistics/foo:stop/text()", "stop"]
        xpath => ["/foo:statistics/foo:place/name/text()", "place_name"]
        xpath => ["/foo:statistics/foo:place/id/text()", "place_id"]
        xpath => ["/foo:statistics/foo:visit", "visits"]
    }

    split {
        field => "visits"
        remove_field => "message"
    }

    xml {
        source => "visits"
        store_xml => "false"
        xpath => ["/foo:visit/foo:date/text()", "date"]
        xpath => ["/foo:visit/foo:status/text()", "status"]
        remove_field => "visits"                
    }

    date {
        match => ["date", "ISO8601"]
    }
}

output {
    stdout { codec => rubydebug }
    elasticsearch {
        hosts => "elasticsearch:9200"
        index => "maaling-%{+YYYY.MM.dd}"
    }
}

Any help is massively appreciated.任何帮助都非常感谢。

EDIT: changed pattern to '编辑:将模式更改为 '

I changed my filebeat configuration to:我将我的 filebeat 配置更改为:

filebeat.prospectors:
- type: log
  paths:
    - /mnt/log/*.ATKSTAT
  encoding: 'windows-1252'
  multiline.pattern: ' \A.*'
  multiline.negate: true
  multiline.match: after

output.logstash:
  hosts: ["logstash:5000"]

This is v6.0.0 of the elk stack这是 elk 堆栈的 v6.0.0

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM