[英]Lucene.net: Querying and using a filter to limit results
像往常一樣,我轉向Stackoverflow用戶群的大量腦力,以幫助解決我正在與之斗爭的Lucene.NET問題。 首先,當談到Lucene和Lucene.NET時,我是一個完整的菜鳥,通過在線使用分散的教程和代碼片段,我為我的場景拼湊了以下解決方案。
情景
我有以下結構的索引:
---------------------------------------------------------
| id | date | security | text |
---------------------------------------------------------
| 1 | 2011-01-01 | -1-12-4- | some analyzed text here |
---------------------------------------------------------
| 2 | 2011-01-01 | -11-3- | some analyzed text here |
---------------------------------------------------------
| 3 | 2011-01-01 | -1- | some analyzed text here |
---------------------------------------------------------
我需要能夠查詢文本字段,但將結果限制為具有特定roleId的用戶。
為實現這一目標我想出的是(經過多次,多次訪問谷歌之后)使用“安全字段”和Lucene過濾器來限制結果集,如下所述:
class SecurityFilter : Lucene.Net.Search.Filter
{
public override System.Collections.BitArray Bits(Lucene.Net.Index.IndexReader indexReader)
{
BitArray bitarray = new BitArray(indexReader.MaxDoc());
for (int i = 0; i < bitarray.Length; i++)
{
if (indexReader.Document(i).Get("security").Contains("-1-"))
{
bitarray.Set(i, true);
}
}
return bitarray;
}
}
... 然后 ...
Lucene.Net.Search.Sort sort = new Lucene.Net.Search.Sort(new Lucene.Net.Search.SortField("date", true));
Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
Lucene.Net.Search.IndexSearcher searcher = new Lucene.Net.Search.IndexSearcher(Lucene.Net.Store.FSDirectory.Open(indexDirectory), true);
Lucene.Net.QueryParsers.QueryParser parser = new Lucene.Net.QueryParsers.QueryParser(Lucene.Net.Util.Version.LUCENE_29, "text", analyzer);
Lucene.Net.Search.Query query = parser.Parse("some search phrase");
SecurityFilter filter = new SecurityFilter();
Lucene.Net.Search.Hits hits = searcher.Search(query, filter, sort);
這按預期工作,只返回id為1和3的文檔。問題是在大型索引上,此過程變得非常慢。
最后,我的問題......有沒有人有關於如何加快速度的提示,或者有一個比我在這里提出的更有效的替代解決方案?
如果您將安全字段編入索引(將其安全字符串拆分為1 12 4 ...)
你可以創建這樣的過濾器
Filter filter = new QueryFilter(new TermQuery(new Term("security ", "1")));
要么
像some text +security:1
一樣形成一個查詢some text +security:1
我用一個簡單的例子改變了我的答案,解釋了我在之前的答案中的意思。
我很快就做到了,並沒有尊重最佳實踐,但它應該給你這個想法。
請注意,安全字段需要進行標記化,以便其中的每個ID都是單獨的標記,例如使用WhitespaceAnalyzer 。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Lucene.Net.Search;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Analysis.Standard;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
public class RoleFilterCache
{
static public Dictionary<string, Filter> Cache = new Dictionary<string,Filter>();
static public Filter Get(string role)
{
Filter cached = null;
if (!Cache.TryGetValue(role, out cached))
{
return null;
}
return cached;
}
static public void Put(string role, Filter filter)
{
if (role != null)
{
Cache[role] = filter;
}
}
}
public class User
{
public string Username;
public List<string> Roles;
}
public static Filter GetFilterForUser(User u)
{
BooleanFilter userFilter = new BooleanFilter();
foreach (string rolename in u.Roles)
{
// call GetFilterForRole and add to the BooleanFilter
userFilter.Add(
new BooleanFilterClause(GetFilterForRole(rolename), BooleanClause.Occur.SHOULD)
);
}
return userFilter;
}
public static Filter GetFilterForRole(string role)
{
Filter roleFilter = RoleFilterCache.Get(role);
if (roleFilter == null)
{
roleFilter =
// the caching wrapper filter makes it cache the BitSet per segmentreader
new CachingWrapperFilter(
// builds the filter from the index and not from iterating
// stored doc content which is much faster
new QueryWrapperFilter(
new TermQuery(
new Term("security", role)
)
)
);
// put in cache
RoleFilterCache.Put(role, roleFilter);
}
return roleFilter;
}
static void Main(string[] args)
{
IndexWriter iw = new IndexWriter(new FileInfo("C:\\example\\"), new StandardAnalyzer(), true);
Document d = new Document();
Field aField = new Field("content", "", Field.Store.YES, Field.Index.ANALYZED);
Field securityField = new Field("security", "", Field.Store.NO, Field.Index.ANALYZED);
d.Add(aField);
d.Add(securityField);
aField.SetValue("Only one can see.");
securityField.SetValue("1");
iw.AddDocument(d);
aField.SetValue("One and two can see.");
securityField.SetValue("1 2");
iw.AddDocument(d);
aField.SetValue("One and two can see.");
securityField.SetValue("1 2");
iw.AddDocument(d);
aField.SetValue("Only two can see.");
securityField.SetValue("2");
iw.AddDocument(d);
iw.Close();
User userone = new User()
{
Username = "User one",
Roles = new List<string>()
};
userone.Roles.Add("1");
User usertwo = new User()
{
Username = "User two",
Roles = new List<string>()
};
usertwo.Roles.Add("2");
User userthree = new User()
{
Username = "User three",
Roles = new List<string>()
};
userthree.Roles.Add("1");
userthree.Roles.Add("2");
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.Add(new Term("content", "can"));
phraseQuery.Add(new Term("content", "see"));
IndexSearcher searcher = new IndexSearcher("C:\\example\\", true);
Filter securityFilter = GetFilterForUser(userone);
TopDocs results = searcher.Search(phraseQuery, securityFilter,25);
Console.WriteLine("User One Results:");
foreach (var aResult in results.ScoreDocs)
{
Console.WriteLine(
searcher.Doc(aResult.doc).
Get("content")
);
}
Console.WriteLine("\n\n");
securityFilter = GetFilterForUser(usertwo);
results = searcher.Search(phraseQuery, securityFilter, 25);
Console.WriteLine("User two Results:");
foreach (var aResult in results.ScoreDocs)
{
Console.WriteLine(
searcher.Doc(aResult.doc).
Get("content")
);
}
Console.WriteLine("\n\n");
securityFilter = GetFilterForUser(userthree);
results = searcher.Search(phraseQuery, securityFilter, 25);
Console.WriteLine("User three Results (should see everything):");
foreach (var aResult in results.ScoreDocs)
{
Console.WriteLine(
searcher.Doc(aResult.doc).
Get("content")
);
}
Console.WriteLine("\n\n");
Console.ReadKey();
}
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.