[英]Java Lucene 4.5 how to search by case insensitive
我們已經實現了Java Lucene搜索引擎4.5,我試圖搜索內容,即使字段值不區分大小寫(例如,如果我搜索名為“Banglore”的城市我得到一個結果,但是當我搜索一個名字的城市時“banglore”我得到0結果)。
我已經使用StandardAnalyzer
來分析數據,使用WildcardQuery
來匹配Like
條件(我嘗試過這里沒有成功)。
我不知道我哪里出錯了。 我很感激有關修復此區分大小寫問題的任何指導。
public SearchHelper
{
Analyzer analyzer;
Directory index;
public IndexSearcher searcher = null;
public IndexWriter indexWriter = null;
public QueryParser parser = null;
private static int hitsPerPage = 100;
/**
* @param indexFileLocation
* @throws IOException
*/
public SearchHelper(String indexFileLocation) throws IOException
{
// this.analyzer =new StandardAnalyzer();
this.analyzer = new CaseStandardAnalyzer();
// analyzer = new ThaiAnalyzer();
this.index = FSDirectory.open(java.nio.file.Paths.get(indexFileLocation));
}
/**
* @param create
* @return
* @throws IOException
*/
public IndexWriter getIndexWriter(boolean create) throws IOException
{
if (indexWriter == null)
{
IndexWriterConfig iwc = new IndexWriterConfig(this.analyzer);
this.indexWriter = new IndexWriter(this.index, iwc);
}
return this.indexWriter;
} //End of getIndexWriter
/**
* @throws IOException
*/
public void closeIndexWriter() throws IOException
{
if (this.indexWriter != null)
{
this.indexWriter.commit();//optimize(); LUCENE_36
this.indexWriter.close();
}
} //End closeIndexWriter
/**
* @param indexFileLocation
* @throws CorruptIndexException
* @throws IOException
*/
public void startSearch(String indexFileLocation) throws CorruptIndexException, IOException
{
// searcher = new IndexSearcher(FSDirectory.open(new File(indexFileLocation)));
IndexReader reader = DirectoryReader.open(FSDirectory.open(java.nio.file.Paths.get(indexFileLocation)));
// IndexReader.open(this.index);
// open(getIndexWriter(true), true);
this.searcher = new IndexSearcher(reader);
}
/**
* @param fieldNames
* @param fieldValues
* @return
* @throws IOException
* @throws ParseException
*
* <p></p>
* https://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser
*/
public ScoreDoc[] searchSEO(String[] fieldNames, String[] fieldValues, int limitSize) throws IOException, ParseException
{
this.analyzer = new StandardAnalyzer();
int searchFieldSize = (null == fieldNames) ? 0 : fieldNames.length;
BooleanQuery booleanQuery = new BooleanQuery();
for (int i = 0; i < searchFieldSize; i++)
{
Query query1 = searchIndexWithWildcardQuery(fieldNames[i], fieldValues[i]);
addQueries(booleanQuery, query1, 2);
}
TopScoreDocCollector collector = null; // Or use by default hitsPerPage instead limitSize
if (limitSize > 0)
{
collector = TopScoreDocCollector.create(limitSize);
} else {
collector = TopScoreDocCollector.create(hitsPerPage);
}
this.searcher.search(booleanQuery,collector);
return collector.topDocs().scoreDocs;
}
/**
* @param whichField
* @param searchString
* @return
* @throws IOException
* @throws ParseException
*/
public Query searchIndexWithWildcardQuery(String whichField, String searchString) throws IOException, ParseException
{
Term term = addTerm(whichField, "*" + searchString + "*");
Query query = new WildcardQuery(term);
return query;
}
/**
* @param whichField
* @param searchString
* @return
*/
public Term addTerm(String whichField, String searchString)
{
Term term = new Term(whichField, searchString);
return term;
}
/**
* @param searchString
* @param operation
* @return
* @throws ParseException
*/
public Query addConditionOpertaion(String searchString, String operation) throws ParseException
{
Query query = null;
if ("and".equals(operation))
{
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
} else if("or".equals(operation)) {
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
}
query = parser.parse(searchString);
return query;
}
/**
* @param booleanQuery <code>BooleanQuery</code>
* @param q <code>Query</code>
* @param type <code>int</code> , 1--> Must, 2-->Should, 3 --> Must Not
*/
public void addQueries(BooleanQuery booleanQuery, Query q, int type)
{
switch(type)
{
case 1: booleanQuery.add(q, Occur.MUST);
break;
case 2: booleanQuery.add(q, Occur.SHOULD);
break;
default:booleanQuery.add(q, Occur.MUST_NOT);
break;
} //End of switch
}
public QueryParser getParser()
{
return parser;
}
public void setParser(String fieldName)
{
this.parser = new QueryParser(fieldName, this.analyzer);
}
public void getDefaultByStatus(int status)
{
this.analyzer = new StandardAnalyzer();
this.parser = new QueryParser("status", this.analyzer);
}
protected void doClear(File dir,boolean deleteSubDir)
{
for (File file: dir.listFiles())
{
if (file.isDirectory() && deleteSubDir)
{
doClear(file,deleteSubDir);
}
file.delete();
}
} //End of doClear();
protected void doClose() throws IOException
{
this.searcher.getIndexReader().close();
}
public boolean add(Object Obj) throws Exception
{
User currentUser = (User)Obj;
boolean isAdded = false;
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));
// addRelatedFields(luceneDoc,city.getStateCode());
IndexWriter writer = getIndexWriter(false);
writer.addDocument(luceneDoc);
closeIndexWriter();
isAdded = true;
System.out.println(isAdded);
return isAdded;
} // End of add
public boolean update(Object Obj) throws Exception
{
boolean isUpdated = false;
User currentUser = (User) Obj;
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
// luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));
// addRelatedFields(luceneDoc,city.getStateCode());
IndexWriter writer = getIndexWriter(false);
writer.updateDocument(new Term("login", currentUser.getLogin()),luceneDoc);
closeIndexWriter();
isUpdated = true;
return isUpdated;
} // End of update
public boolean delete(Object Obj) throws Exception
{
boolean isDeleted = false;
User currentUser = (User) Obj;
Term deleteTerm = new Term("login", currentUser.getLogin());
IndexWriter writer = getIndexWriter(false);
writer.deleteDocuments(deleteTerm); // Or use Query
writer.forceMergeDeletes();
closeIndexWriter();
isDeleted = true;
return isDeleted;
} // End of delete
@Override
public Object search(String[] fieldNames, String[] fieldValues, int returnType, int limit) throws Exception
{
Object obj = null;
org.apache.lucene.search.ScoreDoc[] hits = searchSEO(fieldNames,fieldValues, limit);
int hitSize = (null == hits) ? 0 : hits.length;
System.out.println("total:" + hitSize);
doClose();
return obj;
} // End of search
public void addThreadUser()
{
User user = new User();
addUserPojo(user);
add(user);
}
public void updateThreadUser()
{
User user = new User();
addUserPojo(user);
update(user);
}
public void deleteThreadUser()
{
User user = new User();
addUserPojo(user);
delete(user);
}
private void addUserPojo(User user)
{
user.setOid(3);
user.setLogin("senthil");
user.setFirstName("Semthil");
user.setLastName("Semthil");
user.setStatus(1);
user.setCity("Combiatore");
user.setEmailId("semthil@xyz.com");
}
public void searchUser()
{
searchUser(new String[] {"login"}, new String[] {"Se"}, null);
}
public static void main(String[] args)
{
SearchHelper test = new SearchHelper();
test.searchUser();
}
}
您正在使用StringField
索引數據,但此字段將繞過分析器鏈並始終將您的術語逐字索引為一個標記,而不管您的分析器如何。 如果要分析數據並且StandardAnalyzer
已經進行了較低的套管,則應使用TextField
。 除此之外,該WildcardQuery
不分析其任期,因此,如果您搜索班加羅爾,它不會從索引現已小寫班加羅爾相匹配。 您必須自己小寫搜索項(或使用分析器)。
使用LowerCaseFilter作為您引用的帖子建議:
TokenStream stream = new StandardFilter(Version.LUCENE_CURRENT, tokenizer);
stream = new LowerCaseFilter(Version.LUCENE_CURRENT, stream);
這篇文章中有一個更完整的例子。
您可以使用custome比較類
class CaseIgonreCompare extends FieldComparator<String>{
private String field;
private String bottom;
private String topValue;
private BinaryDocValues cache;
private String[] values;
public CaseIgonreCompare(String field, int numHits) {
this.field = field;
this.values = new String[numHits];
}
@Override
public int compare(int arg0, int arg1) {
return compareValues(values[arg0], values[arg1]);
}
@Override
public int compareBottom(int arg0) throws IOException {
return compareValues(bottom, cache.get(arg0).utf8ToString());
}
@Override
public int compareTop(int arg0) throws IOException {
return compareValues(topValue, cache.get(arg0).utf8ToString());
}
public int compareValues(String first, String second) {
int val = first.length() - second.length();
return val == 0 ? first.compareToIgnoreCase(second) : val;
};
@Override
public void copy(int arg0, int arg1) throws IOException {
values[arg0] = cache.get(arg1).utf8ToString();
}
@Override
public void setBottom(int arg0) {
this.bottom = values[arg0];
}
@Override
public FieldComparator<String> setNextReader(AtomicReaderContext arg0)
throws IOException {
this.cache = FieldCache.DEFAULT.getTerms(arg0.reader(),
field , true);
return this;
}
@Override
public void setTopValue(String arg0) {
this.topValue = arg0;
}
@Override
public String value(int arg0) {
return values[arg0];
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.