[英]Slow elasticsearch query with french addresses with frequent words
由于公司的政策,我将我的项目从 solr 迁移到弹性
我使用 solr 的请求速度很快,但使用 elatic 时速度很慢,而且我的服务无法处理我期望每秒请求数的负载,因为我的系统受 CPU 限制并且请求速度太慢
我的索引包含所有法国街道地址:
我们在 6 个服务器(32gb RAM / 8cpu)的集群上有 3 个主节点和 1 个副本
我们已经应用了所有 elastic.co 的建议“tune-for-search-speed”
java的配置是这样的:
root@ELK001:~# ps aux | grep java
elastic+ 45085 170 61.9 31052544 20460004 ? SLsl 03:18 860:27 /bin/java -Xms16g -Xmx16g -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly -Des.networkaddress.cache.ttl=60 -Des.networkaddress.cache.negative.ttl=10 -XX:+AlwaysPreTouch -Xss1m -Djava.awt.headless=true -Dfile.encoding=UTF-8 -Djna.nosys=true -XX:-OmitStackTraceInFastThrow -Dio.netty.noUnsafe=true -Dio.netty.noKeySetOptimization=true -Dio.netty.recycler.maxCapacityPerThread=0 -Dlog4j.shutdownHookEnabled=false -Dlog4j2.disable.jmx=true -Djava.io.tmpdir=/tmp/elasticsearch-3573362637254362332 -XX:ErrorFile=/var/log/elasticsearch/hs_err_pid%p.log -Des.path.home=/usr/share/elasticsearch -Des.path.conf=/etc/elasticsearch -Des.distribution.flavor=default -Des.distribution.type=rpm -cp /usr/share/elasticsearch/lib/* org.elasticsearch.bootstrap.Elasticsearch -p /var/run/elasticsearch/elasticsearch.pid
最慢的查询是那些在索引中使用非常频繁的术语的查询,例如“1”或“rue”(法语中的街道)“rue”在 25,462,993 条记录中出现 13,287,097 次
慢查询示例
{
"from": 0,
"size": 30,
"query": {
"query_string": {
"query": "(querystring:/1.*/ )AND( querystring:/rue.*/ )AND( querystring:/du.*/ )AND( querystring:/parad.*/)",
"fields": [],
"type": "best_fields",
"default_operator": "and",
"max_determinized_states": 10000,
"phrase_slop": 0,
"escape": false,
"auto_generate_synonyms_phrase_query": true,
"boost": 1.0
}
},
"version": true,
"track_scores": true,
"highlight": {
"pre_tags": ["<em>"],
"post_tags": ["</em>"],
"require_field_match": false,
"fields": {
"ligne1": {},
"ligne2": {},
"ligne3": {},
"numero": {},
"ext_courte": {},
"ext_longue": {},
"libelle_voie": {},
"libelle_voie_syn": {},
"libelle_ligne_5": {},
"libelle_acheminement_cedex": {},
"libelle_acheminement": {},
"code_postal": {},
"code_cedex": {},
"libelle_pays": {},
"libelle_pays_syn": {}
}
}
}
当我使用分析器时,我们看到时间主要花在索引中最常见的关键字上:1(街道编号)rue(法语中的街道)du(法语中的 of)和“build_scorer”阶段
类型 自拍 时间 总时间 % 时间
BooleanQuery +querystring:/parad.*/ +querystring... 0.5ms 116.6ms 99.25%
时序故障
索引设置:
{
"settings": {
"index": {
"number_of_shards": "3",
"provided_name": "s7_sint_profic_index2",
"creation_date": "1619449230582",
"requests": {
"cache": {
"enable": "false"
}
},
"analysis": {
"filter": {
"legacy_synonym_filter": {
"type": "synonym",
"synonyms": [
"bd => boulevard",
"fg => faubourg",
"saint,st",
"sainte,ste",
"I ,1,un",
"II,2,deux",
"III,3,trois",
"IV,4,quatre",
"V,5,cinq",
"VI,6,six",
"VII,7,sept",
"VIII,8,huit",
"IX,9,neuf",
"X,10,dix",
"XI,11,onze",
"XII,12,douze",
"XIII,13,treize",
"XIV,14,quatorze",
"XV,15,quinze",
"XVI,16,seize",
"XX,20,vingt"
]
},
"serca_stop": {
"type": "stop",
"stopwords": [
"le",
"la",
"du",
"de",
"des",
"au",
"et",
"l",
"a",
"d",
"sous",
"sur"
]
}
},
"analyzer": {
"default": {
"filter": [
"serca_stop",
"lowercase",
"legacy_synonym_filter"
],
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1",
"uuid": "9KpNllG6TSi-AxLaB4ETyQ",
"version": {
"created": "6080099"
}
}
},
"defaults": {
"index": {
"max_inner_result_window": "100",
"unassigned": {
"node_left": {
"delayed_timeout": "1m"
}
},
"max_terms_count": "65536",
"lifecycle": {
"name": "",
"rollover_alias": "",
"indexing_complete": "false"
},
"routing_partition_size": "1",
"max_docvalue_fields_search": "100",
"merge": {
"scheduler": {
"max_thread_count": "4",
"auto_throttle": "true",
"max_merge_count": "9"
},
"policy": {
"reclaim_deletes_weight": "2.0",
"floor_segment": "2mb",
"max_merge_at_once_explicit": "30",
"max_merge_at_once": "10",
"max_merged_segment": "5gb",
"expunge_deletes_allowed": "10.0",
"segments_per_tier": "10.0",
"deletes_pct_allowed": "33.0"
}
},
"max_refresh_listeners": "1000",
"max_regex_length": "1000",
"load_fixed_bitset_filters_eagerly": "true",
"number_of_routing_shards": "5",
"write": {
"wait_for_active_shards": "1"
},
"mapping": {
"coerce": "false",
"nested_fields": {
"limit": "50"
},
"depth": {
"limit": "20"
},
"ignore_malformed": "false",
"total_fields": {
"limit": "1000"
}
},
"source_only": "false",
"soft_deletes": {
"enabled": "false",
"retention": {
"operations": "0"
},
"retention_lease": {
"period": "12h"
}
},
"max_script_fields": "32",
"query": {
"default_field": [
"*"
],
"parse": {
"allow_unmapped_fields": "true"
}
},
"format": "0",
"frozen": "false",
"sort": {
"missing": [],
"mode": [],
"field": [],
"order": []
},
"priority": "1",
"codec": "default",
"max_rescore_window": "10000",
"max_adjacency_matrix_filters": "100",
"gc_deletes": "60s",
"optimize_auto_generated_id": "true",
"max_ngram_diff": "1",
"translog": {
"generation_threshold_size": "64mb",
"flush_threshold_size": "512mb",
"sync_interval": "5s",
"retention": {
"size": "512mb",
"age": "12h"
},
"durability": "REQUEST"
},
"auto_expand_replicas": "false",
"mapper": {
"dynamic": "true"
},
"data_path": "",
"highlight": {
"max_analyzed_offset": "-1"
},
"routing": {
"rebalance": {
"enable": "all"
},
"allocation": {
"enable": "all",
"total_shards_per_node": "-1"
}
},
"search": {
"slowlog": {
"level": "TRACE",
"threshold": {
"fetch": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
},
"query": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
}
}
},
"throttled": "false"
},
"fielddata": {
"cache": "node"
},
"default_pipeline": "_none",
"max_slices_per_scroll": "1024",
"shard": {
"check_on_startup": "false"
},
"xpack": {
"watcher": {
"template": {
"version": ""
}
},
"version": "",
"ccr": {
"following_index": "false"
}
},
"percolator": {
"map_unmapped_fields_as_text": "false",
"map_unmapped_fields_as_string": "false"
},
"allocation": {
"max_retries": "5"
},
"refresh_interval": "1s",
"indexing": {
"slowlog": {
"reformat": "true",
"threshold": {
"index": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
}
},
"source": "1000",
"level": "TRACE"
}
},
"compound_format": "0.1",
"blocks": {
"metadata": "false",
"read": "false",
"read_only_allow_delete": "false",
"read_only": "false",
"write": "false"
},
"max_result_window": "10000",
"store": {
"stats_refresh_interval": "10s",
"type": "",
"fs": {
"fs_lock": "native"
},
"preload": []
},
"queries": {
"cache": {
"enabled": "true"
}
},
"ttl": {
"disable_purge": "false"
},
"warmer": {
"enabled": "true"
},
"max_shingle_diff": "3",
"query_string": {
"lenient": "false"
}
}
}
}
索引映射:
{
"mapping": {
"proficDocument": {
"properties": {
"cea": {
"type": "keyword"
},
"querystring": {
"type": "text"
},
"querystring_ligne4": {
"type": "text"
},
"querystring_ligne6": {
"type": "text"
},
"code_afnor": {
"type": "keyword",
"index": false
},
"code_cedex": {
"type": "keyword",
"copy_to": [
"querystring"
]
},
"code_insee": {
"type": "keyword"
},
"code_insee_ancienne_commune": {
"type": "keyword"
},
"code_postal": {
"type": "keyword"
},
"commentaires": {
"type": "text"
},
"coordonnees": {
"type": "geo_point"
},
"dateExport": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dateRef": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"desc_voie": {
"type": "text",
"index": false
},
"desc_voie_syn": {
"type": "text",
"index": false
},
"ext_courte": {
"type": "keyword",
"copy_to": [
"querystring"
]
},
"ext_longue": {
"type": "keyword",
"copy_to": [
"querystring",
"querystring_ligne4"
]
},
"id": {
"type": "alias",
"path": "_id"
},
"idza": {
"type": "keyword",
"index": false
},
"libelle_acheminement": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring",
"querystring_ligne6",
"libelle_acheminement_str"
]
},
"libelle_acheminement_cedex": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_acheminement_str": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"libelle_commune": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_ligne_5": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_pays": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_pays_syn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_projection": {
"type": "keyword",
"index": false
},
"libelle_raison": {
"type": "keyword",
"index": false
},
"libelle_voie": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring",
"querystring_ligne4"
]
},
"libelle_voie_syn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"ligne1": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"ligne2": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"ligne3": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"matvoie": {
"type": "keyword",
"index": false
},
"matvoie_syn": {
"type": "keyword",
"index": false
},
"mention_speciale": {
"type": "text"
},
"mot_directeur": {
"type": "keyword",
"index": false
},
"mot_directeur_syn": {
"type": "keyword",
"index": false
},
"new_cea": {
"type": "keyword"
},
"numero": {
"type": "integer",
"copy_to": [
"querystring",
"querystring_ligne4",
"numero_str"
]
},
"numero_mention_speciale": {
"type": "text"
},
"numero_str": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"type_pays": {
"type": "keyword",
"index": false
},
"type_projection": {
"type": "integer",
"index": false
},
"type_raison": {
"type": "keyword",
"index": false
},
"type_synonyme": {
"type": "keyword",
"index": false
},
"type_voie": {
"type": "keyword",
"index": false
},
"type_voie_syn": {
"type": "keyword",
"index": false
},
"x": {
"type": "float"
},
"y": {
"type": "float"
}
}
}
}
}
我们打算从 elk 6 迁移 elk 7 来尝试解决这个问题
您还有其他建议吗?
正如 Val 已经说过的,您的查询看起来效率低下。 为什么不使用简单的匹配查询并检查它如何影响您的性能?
GET yourindex/_search
{
"query": {
"match": {
"querystring": {
"query": "1 rue du parad"
}
}
}
}
但是有两件事我不确定。
关于 1.,我不知道你的项目,所以不知道你的搜索数据来自哪里。 但也许搜索特定字段并将搜索与布尔查询结合起来会更好?
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.