简体   繁体   中英

Elasticsearch can't find hunspell

I'm running elasticsearch from a docker-compose.yml file:

version: '3'
services:
 mongo:
   image: mongo
   container_name: mongo-cust-mycom
   ports:
     - 27017:27017
   volumes:
     - cust-mycom-mongo:/data/db
   networks:
     - cust-mycom

 elasticsearch:
   image: docker.elastic.co/elasticsearch/elasticsearch:6.5.4
   container_name: elasticsearch-cust-mycom
   ports:
     - 9200:9200
     - 9300:9300
   volumes:
     - cust-mycom-elastic:/usr/share/elasticsearch/data
     - ./cust/config/elasticsearch/config/hunspell:/usr/share/elasticsearch/config/hunspell
   networks:
     - cust-mycom
   environment:
     - cluster.name=i3-elasticsearch
     - xpack.security.enabled=false
     - xpack.monitoring.enabled=false
     - xpack.ml.enabled=false
     - xpack.graph.enabled=false
     - xpack.watcher.enabled=false
   restart: unless-stopped

 kibana:
   image: docker.elastic.co/kibana/kibana:6.5.4
   container_name: kibana-cust-mycom
   ports:
     - 5601:5601
   networks:
     - cust-mycom
   depends_on:
     - elasticsearch
   restart: unless-stopped


networks:
 cust-mycom:
   driver: bridge

volumes:
 cust-mycom-mongo:
 cust-mycom-elastic:

using docker-compose up -d .

When I try to create my desired index, using the following json:

{
    "settings": {
        "number_of_shards": 3,
        "number_of_replicas": 2,
        "analysis": {
            "filter": {
                "swedish_stemmer": {
                    "type": "hunspell",
                    "language": "sv_SE"
                },
                "ins_pattern": {
                    "type": "pattern_capture",
                    "patterns": [
                        "([a-zåäö]*)(prod)"
                    ]
                },
                "cust_stopwords": {
                    "type":       "stop",
                    "stopwords":  [ "en", "ett", "det", "den" ]
                }
            },
            "analyzer": {
                "swedish_index": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "ins_pattern",
                        "swedish_stemmer"
                    ]
                },
                "swedish_query": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "swedish_stemmer",
                        "cust_stopwords"
                    ]
                }
            }
        }
    },
    "mappings": {
        "default": {
            "properties": {
                "keywords": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query"
                },
                "audience": {
                    "type": "keyword"
                },
                "contentExcerpt": {
                    "type": "text"
                },
                "date": {
                    "type": "date",
                    "store": true,
                    "format": "dateOptionalTime"
                },
                "validUntil": {
                    "type": "date",
                    "store": true,
                    "format": "dateOptionalTime"
                },
                "informationType": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query"
                },
                "mainContentOfPage": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query",
                    "term_vector": "with_positions_offsets"
                },
                "thumbnailUrl": {
                    "type": "keyword",
                    "store": true,
                    "norms": false
                },
                "title": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query"
                },
                "url": {
                    "type": "keyword",
                    "store": true,
                    "norms": false
                },
                "tags": {
                    "type": "text",
                    "store": true,
                    "norms": false,
                    "analyzer": "swedish_index",
                    "search_analyzer": "swedish_query"
                }
            }
        }
    }
}

and the following script:

#!/bin/bash

curl -XDELETE http://localhost:9200/main
curl -XPUT -H "Content-type: application/json" -d @json/custse.index.json http://localhost:9200/main
curl -XPUT http://localhost:9200/main/_settings -H "Content-Type: application/json" -d "{
    \"index\" : {
        \"number_of_replicas\" : 0
    }
}"

I get the following error message:

{"error":{"root_cause":[{"type":"illegal_state_exception","reason":"failed to load hunspell dictionary for locale: sv_SE"}]

I've tried putting my hunspell dictionaries in /usr/share/elasticsearch/config/hunspell/ , /usr/share/elasticsearch/hunspell , /etc/elasticsearch/hunspell and /etc/elasticsearch/config/hunspell . It can't find any of them.

Here are the contents of the hunspell file:

/etc/elasticsearch$ ls hunspell
sv_SE

/etc/elasticsearch$ ls hunspell/sv_SE/
cust.dic  README_sv_SE.txt  sv_SE.aff  sv_SE.dic

How can I make elasticsearch find the hunspell dictionaries?

I confirm that this configuration works:

version: '3.4'
    
services:    
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:6.8.13
    container_name: elasticsearch6
    ports:
      - "127.0.0.1:9200:9200"
    environment:
      - "ES_JAVA_OPTS=-Xms256m -Xmx256m"
    restart: always
    volumes:
      - "es_data:/usr/share/elasticsearch/data"
      - ./elasticsearch/hunspell:/usr/share/elasticsearch/config/hunspell

volumes:
  es_data:

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM