使用logstash將文檔嵌套到elasticsearch

Question

大家好我正在嘗試使用logstash將文檔從MSSQL服務器索引到elasticsearch。 我希望我的文檔作為嵌套文檔攝取，但我收到聚合異常錯誤

我把我所有的代碼都放在這里

Create table department(
ID Int identity(1,1) not null,
Name varchar(100)
)

Insert into department(Name)
Select 'IT Application development'

union all

Select 'HR & Marketing'

Create table Employee(
ID Int identity(1,1) not null,
emp_Name varchar(100),
dept_Id int
)

Insert into Employee(emp_Name,dept_Id)
Select 'Mohan',1
union all
Select 'parthi',1
union all
Select 'vignesh',1

Insert into Employee(emp_Name,dept_Id)
Select 'Suresh',2
union all
Select 'Jithesh',2
union all
Select 'Venkat',2

最終選擇語句

SELECT 
De.id AS id,De.name AS deptname,Emp.id AS empid,Emp.emp_name AS empname
FROM  department De LEFT JOIN employee Emp ON De.id = Emp.dept_Id
ORDER BY De.id

結果應該是這樣的

我的彈性搜索映射

PUT /departments
{
 "mappings": {
   "properties": {
     "id":{
       "type":"integer"
     },
     "deptname":{
       "type":"text"
     },
     "employee_details":{
       "type": "nested",
       "properties": {
         "empid":{
           "type":"integer"
         },
         "empname":{
           "type":"text"
         }
       }
     }
   }
 }
}

我的logstash配置文件

input {
jdbc {
jdbc_driver_library => ""
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://EC2AMAZ-J90JR4A\SQLEXPRESS:1433;databaseName=xxxx;"
jdbc_user => "xxxx"
jdbc_password => "xxxx"
statement => "SELECT 
De.id AS id,De.name AS deptname,Emp.id AS empid,Emp.emp_name AS empname
FROM  department De LEFT JOIN employee Emp ON De.id = Emp.dept_Id
ORDER BY De.id"
}
}
filter{
        aggregate {
        task_id => "%{id}"
        code => "
        map['id'] = event['id']
        map['deptname'] = event['deptname']
        map['employee_details'] ||= []
        map['employee_details'] << {'empId' => event['empid'], 'empname' => event['empname'] }
        "
        push_previous_map_as_event => true
        timeout => 5
        timeout_tags => ['aggregated']
        } 
    }
    output{
    stdout{ codec => rubydebug }
    elasticsearch{
            hosts => "https://d9bc7cbca5ec49ea96a6ea683f70caca.eastus2.azure.elastic-cloud.com:4567"
            user => "elastic"
            password => "****"
            index => "departments"
            action => "index"
            document_type => "departments"
            document_id => "%{id}"
           }

}

運行logstash時出現以下錯誤

彈性搜索截圖供參考

我的 elasticsearch 輸出應該是這樣的

{
  "took" : 398,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "departments",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "id" : 1,
          "deptname" : "IT Application development"
          "employee_details" : [
          {
          "empid" : 1,
          "empname" : "Mohan"
           },
          {
          "empid" : 2,
          "empname" : "Parthi"
          },
          {
          "empid" : 3,
          "empname" : "Vignesh"
           }
          ]
         }
      }
    ]
  }
}

有人可以幫我解決這個問題嗎？ 我希望所有員工的 empname 和 empid 都應該作為相應部門的嵌套文檔插入。 提前致謝

Answer 1

我使用 JDBC_STREAMING 代替聚合過濾器，它工作正常可能對看這篇文章的人有所幫助。

input {
jdbc {
jdbc_driver_library => "D:/Users/xxxx/Desktop/driver/mssql-jdbc-7.4.1.jre12-shaded.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://EC2AMAZ-J90JR4A\SQLEXPRESS:1433;databaseName=xxx;"
jdbc_user => "xxx"
jdbc_password => "xxxx"
statement => "Select Policyholdername,Age,Policynumber,Dob,Client_Address,is_active from policy"
}
}
filter{
jdbc_streaming {
jdbc_driver_library => "D:/Users/xxxx/Desktop/driver/mssql-jdbc-7.4.1.jre12-shaded.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://EC2AMAZ-J90JR4A\SQLEXPRESS:1433;databaseName=xxxx;"
jdbc_user => "xxxx"
jdbc_password => "xxxx"
statement => "select claimnumber,claimtype,is_active from claim where policynumber = :policynumber"
parameters => {"policynumber" => "policynumber"}
target => "claim_details"
}
}
output {
elasticsearch {
hosts => "https://e5a4a4a4de7940d9b12674d62eac9762.eastus2.azure.elastic-cloud.com:9243"
user => "elastic"
password => "xxxx"
index => "xxxx"
action => "index"
document_type => "_doc"
document_id => "%{policynumber}"

}
stdout { codec => rubydebug }
}

Answer 2

您也可以嘗試在 logstash 過濾器插件中使用聚合。 檢查這個使用 Logstash 插入嵌套對象

https://xyzcoder.github.io/2020/07/29/indexing-documents-using-logstash-and-python.html

我只是顯示一個對象，但我們也可以有多個項目數組

input {
    jdbc {
           jdbc_driver_library => "/usr/share/logstash/javalib/mssql-jdbc-8.2.2.jre11.jar"
           jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
           jdbc_connection_string => "jdbc:sqlserver://host.docker.internal;database=StackOverflow2010;user=pavan;password=pavankumar@123"
           jdbc_user => "pavan"
           jdbc_password => "pavankumar@123"
           statement => "select top 500 p.Id as PostId,p.AcceptedAnswerId,p.AnswerCount,p.Body,u.Id as userid,u.DisplayName,u.Location
                        from StackOverflow2010.dbo.Posts p inner join StackOverflow2010.dbo.Users u
                        on p.OwnerUserId=u.Id"
        }
}

filter {
    aggregate {
        task_id => "%{postid}"
        code => "
            map['postid'] = event.get('postid')
            map['accepted_answer_id'] = event.get('acceptedanswerid')
            map['answer_count'] = event.get('answercount')
            map['body'] = event.get('body')
            map['user'] = {
                'id' => event.get('userid'),
                'displayname' => event.get('displayname'),
                'location' => event.get('location')
            }
        event.cancel()"
        push_previous_map_as_event => true
        timeout => 30
    }
}

output {
    elasticsearch {
        hosts => ["http://elasticsearch:9200", "http://elasticsearch:9200"]
        index => "stackoverflow_top"
    }
    stdout {
        codec => rubydebug
    }
}

所以在那個例子中，我有多種插入數據的方式，比如聚合、JDBC 流和其他場景

使用logstash將文檔嵌套到elasticsearch

問題描述

2 個解決方案

解決方案1
0 已采納 2020-02-04 10:20:44

解決方案2
0 2020-07-29 12:09:30

使用logstash將文檔嵌套到elasticsearch

問題描述

2 個解決方案

解決方案1 0 已采納 2020-02-04 10:20:44

解決方案2 0 2020-07-29 12:09:30

解決方案1
0 已采納 2020-02-04 10:20:44

解決方案2
0 2020-07-29 12:09:30