[英]Logstash is not migrating data to Elasticsearch
I have a bunch of CSV files that are needed to be migrated to Elastichsearch, I managed to use Logstash version 7.16.3, the index is already created on elastichsearch using the proper mapping.我有一堆需要迁移到 Elastichsearch 的 CSV 文件,我设法使用 Logstash 版本 7.16.3,索引已经使用正确的映射在 elastichsearch 上创建。 the configuration file is as below:配置文件如下:
input{
file{
path=> "C:/Users/fr-pa/Documents/wikidata/extracted/*.csv"
start_position => "beginning"
sincedb_path => "NULL"
} } filter{
csv{
separator => ","
columns =>["id", "type", "arlabel", "enlabel","araliases",
"enaliases","ardescription","endescription","maincategory",
"arwiki", "enwiki","arwikiquote", "enwikiquote"]
} } output{
elasticsearch{
hosts=> "http://localhost:9200/"
index => "wikidata_index"
}
stdout {
} }
But the data is not migrated, the output of the Logstash:但是数据没有迁移,Logstash的output:
Does anyone have an idea what is the problem?有谁知道问题出在哪里?
This is my index这是我的索引
request_body = {
"settings": {
"analysis": {
"filter": {
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
},
"arabic_keywords": {
"type": "keyword_marker",
"keywords": ["مثال"]
},
"arabic_stemmer": {
"type": "stemmer",
"language": "arabic"
},
"english_stop": {
"type": "stop",
"stopwords": "_english_"
},
"english_keywords": {
"type": "keyword_marker",
"keywords": ["example"]
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
},
"english_possessive_stemmer": {
"type": "stemmer",
"language": "possessive_english"
}
},
"analyzer": {
"rebuilt_arabic": {
"tokenizer": "standard",
"filter": [
"lowercase",
"decimal_digit",
"arabic_stop",
"arabic_normalization",
"arabic_keywords",
"arabic_stemmer"
]
},
"comma_split":{
"type" : "pattern",
"pattern" : ","
},
"rebuilt_english": {
"tokenizer": "standard",
"filter": [
"english_possessive_stemmer",
"lowercase",
"english_stop",
"english_keywords",
"english_stemmer"
]
}
}
}
} ,
"mappings": {
"properties": {
"id": {
"type": "keyword",
"ignore_above": 256
},
"type": {
"type": "text",
"analyzer": "comma_split"
},
"arlabel": {
"type": "text",
"analyzer": "rebuilt_arabic"
},
"enlabel": {
"type": "text",
"analyzer": "rebuilt_english"
},
"araliases": {
"type": "text",
"analyzer": "comma_split"
},
"enaliases": {
"type": "text",
"analyzer": "comma_split"
},
"ardescription":{
"type": "text",
"analyzer": "rebuilt_arabic"
},
"endescription":{
"type": "text",
"analyzer": "rebuilt_english"
},
"maincategory":{
"type": "text",
"analyzer": "comma_split"
},
"arwiki":{
"type": "text",
"analyzer": "rebuilt_arabic"
},
"enwiki":{
"type": "text",
"analyzer": "rebuilt_english"
},
"arwikiquote":{
"type": "text",
"analyzer": "rebuilt_arabic"
},
"enwikiquote": {
"type": "text",
"analyzer": "rebuilt_english"
}
}
}
}
please note that there are some fields that have empty values, I tried to use the python Bulk helper class to insert the data:请注意,有些字段有空值,我尝试使用 python Bulk helper class 插入数据:
with open(full_path,encoding="utf8") as f:
reader = csv.DictReader(f)
print(reader)
helpers.bulk(es, reader, index='wikidata_index')
The error raised is:引发的错误是:
C:\Users\fr-pa\Documents\wikidata\extracted\till_Q10091689_item.csv
<csv.DictReader object at 0x0000028E86C47EB0>
---------------------------------------------------------------------------
BulkIndexError Traceback (most recent call last)
<ipython-input-42-3849641bd8f9> in <module>
5 reader = csv.DictReader(f)
6 print(reader)
----> 7 helpers.bulk(es, reader, index='wikidata_index')
C:\ProgramData\Anaconda3\lib\site-packages\elasticsearch\helpers\actions.py in bulk(client, actions, stats_only, ignore_status, *args, **kwargs)
408 # make streaming_bulk yield successful results so we can count them
409 kwargs["yield_ok"] = True
--> 410 for ok, item in streaming_bulk(
411 client, actions, ignore_status=ignore_status, *args, **kwargs
412 ):
C:\ProgramData\Anaconda3\lib\site-packages\elasticsearch\helpers\actions.py in streaming_bulk(client, actions, chunk_size, max_chunk_bytes, raise_on_error, expand_action_callback, raise_on_exception, max_retries, initial_backoff, max_backoff, yield_ok, ignore_status, *args, **kwargs)
327
328 try:
--> 329 for data, (ok, info) in zip(
330 bulk_data,
331 _process_bulk_chunk(
C:\ProgramData\Anaconda3\lib\site-packages\elasticsearch\helpers\actions.py in _process_bulk_chunk(client, bulk_actions, bulk_data, raise_on_exception, raise_on_error, ignore_status, *args, **kwargs)
254 raise_on_error=raise_on_error,
255 )
--> 256 for item in gen:
257 yield item
258
C:\ProgramData\Anaconda3\lib\site-packages\elasticsearch\helpers\actions.py in _process_bulk_chunk_success(resp, bulk_data, ignore_status, raise_on_error)
185
186 if errors:
--> 187 raise BulkIndexError("%i document(s) failed to index." % len(errors), errors)
188
189
BulkIndexError: ('500 document(s) failed to index.', [{'index': {'_index': 'wikidata_index', '_type': '_doc', '_id': 'dbxzon4BOVq7OZfct2-t', 'status': 400, 'error': {'type': 'mapper_parsing_exception', 'reason': 'failed to parse', 'caused_by': {'type': 'illegal_argument_exception', 'reason': 'field name cannot be an empty string'}},
The problem is evident from the last line of the stacktrace:从堆栈跟踪的最后一行可以明显看出问题:
BulkIndexError: ('500 document(s) failed to index.', [{'index': {'_index': 'wikidata_index', '_type': '_doc', '_id': 'dbxzon4BOVq7OZfct2-t', 'status': 400, 'error': {'type': 'mapper_parsing_exception', 'reason': 'failed to parse', 'caused_by': {'type': 'illegal_argument_exception', 'reason': 'field name cannot be an empty string'} }, BulkIndexError: ('500 个文档未能索引。', [{'index': {'_index': 'wikidata_index', '_type': '_doc', '_id': 'dbxzon4BOVq7OZfct2-t', 'status ': 400, 'error': {'type': 'mapper_parsing_exception', 'reason': '解析失败', 'caused_by': {'type': 'illegal_argument_exception', 'reason': '字段名不能是空字符串'} },
You need to remove or replace the fields with empty keys, for example in a way like this one .您需要使用空键删除或替换字段,例如以这样的方式。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.