简体   繁体   中英

Solr Better search result with adjacent query keyword

I have configured solr for my ecommerce application (That mostly contains books data). The search result does not seem to return what I expect.

Following is the configuration.

schema.xml `

    <field name="namespace" type="string" indexed="true" stored="false" />
    <field name="id" type="string" indexed="true" stored="true" />
    <field name="productId" type="long" indexed="true" stored="true" />
    <field name="skuId" type="long" indexed="true" stored="true" />
    <field name="category" type="long" indexed="true" stored="false" multiValued="true" />
    <field name="explicitCategory" type="long" indexed="true" stored="false" multiValued="true" />
    <field name="searchable" type="text_general" indexed="true" stored="false" />

    <dynamicField name="*_searchable" type="text_general" indexed="true" stored="false" />
    <dynamicField name="*_i" type="int" indexed="true" stored="false" />
    <dynamicField name="*_is" type="int" indexed="true" stored="false" multiValued="true" />
    <dynamicField name="*_s" type="string" indexed="true" stored="false" />
    <dynamicField name="*_ss" type="string" indexed="true" stored="false" multiValued="true" />
    <dynamicField name="*_l" type="long" indexed="true" stored="false" />
    <dynamicField name="*_ls" type="long" indexed="true" stored="false" multiValued="true" />
    <dynamicField name="*_t" type="text_general" indexed="true" stored="false" />
    <dynamicField name="*_txt" type="text_general" indexed="true" stored="false" multiValued="true" />
    <dynamicField name="*_b" type="boolean" indexed="true" stored="false" />
    <dynamicField name="*_bs" type="boolean" indexed="true" stored="false" multiValued="true" />
    <dynamicField name="*_d" type="double" indexed="true" stored="false" />
    <dynamicField name="*_ds" type="double" indexed="true" stored="false" multiValued="true" />
    <dynamicField name="*_p" type="double" indexed="true" stored="false" />

    <dynamicField name="*_dt" type="date" indexed="true" stored="false" />
    <dynamicField name="*_dts" type="date" indexed="true" stored="false" multiValued="true" />

    <!-- some trie-coded dynamic fields for faster range queries -->
    <dynamicField name="*_ti" type="tint" indexed="true" stored="false" />
    <dynamicField name="*_tl" type="tlong" indexed="true" stored="false" />
    <dynamicField name="*_td" type="tdouble" indexed="true" stored="false" />
    <dynamicField name="*_tdt" type="tdate" indexed="true" stored="false" />

    <!-- Both field types required for geolocation searches. First stores the
        lat and lon components for the "coordinate" FieldType. Second stores
        the coordinate. -->
    <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
    <dynamicField name="*_c"  type="coordinate" indexed="true" stored="false"/> 
</fields>

<uniqueKey>id</uniqueKey>

<types>
    <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />

    <!-- boolean type: "true" or "false" -->
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" />

    <!-- Default numeric field types. For faster range queries, consider the 
        tint/tlong/tdouble types. -->
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0" />
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" />
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0" />

    <!-- Numeric field types that index each value at various levels of precision 
        to accelerate range queries when the number of values between the range endpoints 
        is large. See the javadoc for NumericRangeQuery for internal implementation 
        details. Smaller precisionStep values (specified in bits) will lead to more 
        tokens indexed per value, slightly larger index size, and faster range queries. 
        A precisionStep of 0 disables indexing at different precision levels. -->
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" />
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" />
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" />

    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, 
        and is a more restricted form of the canonical representation of dateTime 
        http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z" designates UTC 
        time and is mandatory. Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z 
        All other components are mandatory. Expressions can also be used to denote 
        calculations that should be performed relative to "NOW" to determine the 
        value, ie... NOW/HOUR ... Round to the start of the current hour NOW-1DAY 
        ... Exactly 1 day prior to now NOW/DAY+6MONTHS+3DAYS ... 6 months and 3 days 
        in the future from the start of the current day Consult the DateField javadocs 
        for more information. Note: For faster range queries, consider the tdate 
        type -->
    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0" />

    <!-- A Trie based date field for faster date range queries and date faceting. -->
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" />

    <!-- A general text field that has reasonable, generic cross-language defaults: 
        it tokenizes with StandardTokenizer and down cases. -->
    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
        </analyzer>
        <analyzer type="query">
            <tokenizer class="solr.StandardTokenizerFactory" />
            <filter class="solr.LowerCaseFilterFactory" />
        </analyzer>
    </fieldType>

    <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
    <fieldType name="coordinate" class="solr.LatLonType" subFieldSuffix="_coordinate"/>

</types>

`

solrconfig.xml

    <?xml version="1.0" encoding="UTF-8" ?>
<config>
<luceneMatchVersion>4.10.3</luceneMatchVersion>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}" />
<updateHandler class="solr.DirectUpdateHandler2" />

<query>
    <maxBooleanClauses>1024</maxBooleanClauses>

    <filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0" />
    <queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0" />
    <documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0" />
    <cache name="perSegFilter" class="solr.search.LRUCache" size="10" initialSize="0" autowarmCount="10" 
            regenerator="solr.NoOpRegenerator" />

    <enableLazyFieldLoading>true</enableLazyFieldLoading>

    <queryResultWindowSize>20</queryResultWindowSize>
    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>

    <listener event="newSearcher" class="solr.QuerySenderListener" />
    <listener event="firstSearcher" class="solr.QuerySenderListener">
        <arr name="queries">
            <lst>
                <str name="q">static firstSearcher warming in solrconfig.xml</str>
            </lst>
        </arr>
    </listener>

    <useColdSearcher>false</useColdSearcher>
    <maxWarmingSearchers>2</maxWarmingSearchers>
</query>

<requestDispatcher handleSelect="false">
    <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048000" formdataUploadLimitInKB="2048"
                addHttpRequestToContext="false"/>
    <httpCaching never304="true" />
</requestDispatcher>

<requestHandler name="/select" class="solr.SearchHandler">
    <lst name="defaults">
        <str name="echoParams">explicit</str>
        <int name="rowsa">10</int>
        <str name="df">name_t</str>
    </lst>
</requestHandler>

<queryResponseWriter name="json" class="solr.JSONResponseWriter">
    <str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>

For example when I search for 2 states it gives me lot of random results, which does not even contain 2 states in the title.

在此处输入图片说明

However when I search for 2 states in phrase "2 States", I do get the relevant results"

在此处输入图片说明

I dont want to restrict every search into quotes, since user might search for some combination like "book by author" which certainly give 0 results if searched in phrase since it wont match the exact phrase.

How can I imporve my search so that I can list most relevant results on the top.

You can use the pf2 and pf3 parameters in the edismax handler to give boosts to documents where two ( pf2 ) or three ( pf3 ) of your terms are found after each other in the field.

defType=edismax&pf2=title^4

You also have the pf argument for the regular dismax handler, but that's built on the assumption that all the terms are close together. It might help, but pf2 or pf3 sounds better suited for what you need.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM