[英]How does regex query work on lucene?


我正在使用Lucene 5.4.1




public IndexWriter generateIndex(String docsPath) throws IOException {

      String indexPath = System.getProperty("java.io.tmpdir") +File.separator+"indexDirectory";
        if (indexPath == null) {
          throw new IOException("System property 'java.io.tmpdir' does not specify a tmp dir");
        File tmpDir = new File(indexPath);
        if (!tmpDir.exists()) {
          boolean created = tmpDir.mkdirs();
          if (!created) {
            throw new IOException("Unable to create tmp dir " + tmpDir);

    boolean create = true;
    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
        } else {

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");
    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());

    return getIndexWriter();

static void indexDocs(final IndexWriter writer, Path path) throws IOException {
    if (Files.isDirectory(path)) {
        Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
                try {
                    indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
                } catch (IOException ignore) {
                    // don't index files that can't be read.
                return FileVisitResult.CONTINUE;
    } else {
        indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.NO);

        doc.add(new LongField("modified", lastModified, Field.Store.NO));
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            System.out.println("adding " + file);
        } else {
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);


public IndexReader searchExecutor(String index, String queryString, RegexCapabilities capability) throws Exception {

    String field = "contents";
    String queries = null;
    boolean raw = false;
    int hitsPerPage = Integer.MAX_VALUE;
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new StandardAnalyzer();

    BufferedReader in = null;
    Query q = new RegexpQuery(new Term("text", queryString));
    q = q.rewrite(reader);

    RegexQuery query = new RegexQuery(new Term("\\s*(FIND|find)"));

     if (capability != null)

    System.out.println("Searching for: " + query.toString(field));
    searcher.search(query, null, 1000);
    doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);
    return reader;


public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw,
        boolean interactive)
                throws IOException {

    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;
    // TermsEnum.totalTermFreq();
    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");
    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    for (int i = start; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        String path = doc.get("path");
        File file = new File(path);
        if (path != null) {
            System.out.println((i + 1) + ". " + path);
            String title = doc.get("title");
            if (title != null) {
                System.out.println("   Title: " + doc.get("title"));
        } else {
            System.out.println((i + 1) + ". " + "No path for this document");




  1. 您正在使用不贊成使用的RegexQuery ,因此請嘗試RegexpQuery
  2. 您的regEx示例以\\s*開頭,但您沒有使用KeywordTokenizer 大多數其他令牌生成器將刪除(也稱為“拆分為”)空白
  3. 您的regEx示例並非完全是小寫。 但是標准分析器包含LowerCaseFilter。 請注意:您的regEx將直接針對您索引的標記(而不針對原始文本)

->閱讀ESTestRegexpRandom(測試類)中 支持的RegExp語法語法,並在索引上使用https://github.com/DmitryKey/luke


