简体   繁体   中英

Is there a way to use regex in javascript to implement an efficient and accurate search functionality

I have been working on a project that requires its users to search for products something like a Walmart or Amazon search functionality. But it seems anytime I feel like i have a working solution I face another problem. Here is my current code snippet. I will explain what the code does below.

const keywords = [
  'oranges',
  'red bull',
  'red bull energy drink',
  'carnation breakfast essentials',
  'carnation instant breakfast',
  'organic oranges',
  'oargens',
  'nesquik powder',
  "welch's orange juice",
  'mandarin oranges',
  'bananas',
  'nesquik chocolate powder',
  'jimmy dean sausage',
  'organic bananas',
  'nesquik',
  'nesquik no sugar',
  "welch's white grape",
  'great value',
  'great value apple juice',
  'lemon',
  'lemon fruit',
  'avocados',
  'avocados, each',
  'apple juice',
];

class SearchApi {
  constructor(keywords, query) {
    this.keywords = keywords;
    this.query = query;
  }

  findWithRegex(keyword, query) {
    const pattern = query
      .split('')
      .map(q => {
        return `(?=.*${q})`;
      })
      .join('');

    const regex = new RegExp(`${pattern}`, 'g');

    return keyword.match(regex);
  }

  matchKeywords() {
    const str = this.query.trim().toLowerCase().substring(0, 3);
    const queryLength = this.query.trim().length;

      return this.keywords.filter(keyword => {
        const keywordSubstr = keyword.substring(0, 3);
        const equalInitials = keyword.substring(0, 1) === this.query.toLowerCase().substring(0, 1);

        return this.findWithRegex(keywordSubstr, str) && equalInitials && this.findWithRegex(keyword.substring(queryLength, queryLength - 3), this.query.trim().substring(queryLength, queryLength - 3));
    });
  }
}

const searchApi = new SearchApi(keywords, 'organic banan');
searchApi.matchKeywords();

Code Explained

What I am basically doing here is when a query is made, I compare the first and last three characters of the query and keyword and also check if the initials in the query and keyword are the same because if someone types the letter " o " I want to show only keywords that begin with that letter.

It works fine but unfortunately while testing when I type " organic banan " as the query I get ["organic oranges", "organic bananas"] which should be ["organic bananas"] .

This is because the regex function finds the characters " a " and " n " in the last three letters of organic oranges using. Any further assistance from here on how to do this efficiently will be helpful to me.

Search-as-you-type or auto-complete features are usually implemented with specialized data structures and algorithms but not regex (unless your search feature is all about regex searching..)

You may want to use a binary search to search strings in an array as shown below (I've selected this function below for demonstration purposes only and do not recommend it. Source ). You will find probably many packages that fit your environment and needs, eg fast-string-search which is using N-API and boyer-moore-magiclen to make things fast.

In terms of data structures, prefix trees (TRIE) are often suggested and used to implement fast autocomplete features. Here is a simple implementation that shows the basic concept of a TRIE.

 var movies = [ "ACADEMY DINOSAUR", "ACE GOLDFINGER", "ADAPTATION HOLES", "AFFAIR PREJUDICE", "BENEATH RUSH", "BERETS AGENT", "BETRAYED REAR", "BEVERLY OUTLAW", "BIKINI BORROWERS", "YENTL IDAHO", "YOUNG LANGUAGE", "YOUTH KICK", "ZHIVAGO CORE", "ZOOLANDER FICTION", "ZORRO ARK" ]; var searchBinary = function (needle, haystack, case_insensitive) { if (needle == "") return []; var haystackLength = haystack.length; var letterNumber = needle.length; case_insensitive = (typeof (case_insensitive) === 'undefined' || case_insensitive) ? true : false; needle = (case_insensitive) ? needle.toLowerCase() : needle; /* start binary search, Get middle position */ var getElementPosition = findElement() /* get interval and return result array */ if (getElementPosition == -1) return []; return getRangeElement = findRangeElement() function findElement() { if (typeof (haystack) === 'undefined' || !haystackLength) return -1; var high = haystack.length - 1; var low = 0; while (low <= high) { mid = parseInt((low + high) / 2); var element = haystack[mid].substr(0, letterNumber); element = (case_insensitive) ? element.toLowerCase() : element; if (element > needle) { high = mid - 1; } else if (element < needle) { low = mid + 1; } else { return mid; } } return -1; } function findRangeElement() { for (i = getElementPosition; i > 0; i--) { var element = (case_insensitive) ? haystack[i].substr(0, letterNumber).toLowerCase() : haystack[i].substr(0, letterNumber); if (element != needle) { var start = i + 1; break; } else { var start = 0; } } for (i = getElementPosition; i < haystackLength; i++) { var element = (case_insensitive) ? haystack[i].substr(0, letterNumber).toLowerCase() : haystack[i].substr(0, letterNumber); if (element != needle) { var end = i; break; } else { var end = haystackLength - 1; } } var result = []; for (i = start; i < end; i++) { result.push(haystack[i]) } return result; } }; testBinary = searchBinary("BIKINI", movies, false); console.log('searchBinary("BIKINI", movies, false) = [' + testBinary + ']'); testBinary = searchBinary("b", movies, false); console.log('searchBinary("b", movies, false) = [' + testBinary + ']'); testBinary = searchBinary("b", movies, true); console.log('searchBinary("b", movies, true) = [' + testBinary + ']');

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM