简体   繁体   中英

How to remove word in string based on array in Javascript when word's character length in string is fewer than in array?

I want to remove some word in string based on array. But the word's character length in string is fewer than in array. Is it possible to match it using regex and then replace it with empty string? If not, what is the alternatives?

I tried using regex to match the word, but i can't achieve it. I don't know how to make regex match minimum 3 character from the array.

array = ['reading', 'books'];

string = 'If you want to read the book, just read it.';

desiredOutput = 'If you want to  the , just  it.';


// Desired match

'reading' -> match for 'rea', 'read', 'readi', 'readin', 'reading'

'books' -> match for 'boo', 'book', 'books'

One option is to match 3 or more word characters starting at a word boundary, then use a replacer function to return the empty string if any of the words startsWith the word in question:

 const array = ['reading', 'books']; const string = 'If you want to read the book, just read it.'; const output = string.replace( /\\b\\w{3,}/g, word => array.some(item => item.startsWith(word)) ? '' : word ); console.log(output); 

The answer from CertainPerformance is better - easier to implement and to maintain but it's worth noting that - you can also generate a regex from the array.

The idea is simple enough - if you want to match r , re , rea , read , readi , readin , reading the regex for that is reading|readin|readi|read|rea|re|r . The reason you want the longest variation first is because otherwise the regex engine will stop at the first match in finds:

 let regex = /r|re|rea|read/g // ↑_________________ console.log( // | "read".replace(regex, "")// | // ↑___________________________| ) 

So you can take a word and break it out in a this pattern to generate a regex from it

 function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; } console.log(allSubstrings("reading")) 

With that you can simply generate the regex you need.

 function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; } function toPattern(word) { let substrings = allSubstrings(word); let pattern = substrings.join("|"); return pattern; } console.log(toPattern("reading")) 

The final thing is to take an array and convert it to a regex. Which requires treating each word and then combining each individual regex into one that matches any of the words:

 const array = ['reading', 'books']; const string = 'If you want to read the book, just read it.'; //generate the pattern let pattern = array .map(toPattern) //first, for each word .join("|"); //join patterns for all words //convert the pattern to a regex let regex = new RegExp(pattern, "g"); let result = string.replace(regex, ""); //desiredOutput: 'If you want to the , just it.'; console.log(result); function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; } function toPattern(word) { let substrings = allSubstrings(word); let pattern = substrings.join("|"); return pattern; } 

So, this is how you can generate a regular expression from that array. In this case, that works, but it's not guaranteed to, because there is a danger it could match something you don't want. For example, r will match any character, it doesn't necessarily need to be in a word that matches this.

 const array = ['reading']; const string = 'The quick brown fox jumps over the lazy dog'; // ^ ^ let pattern = array .map(word => allSubstrings(word).join("|")) .join("|"); let regex = new RegExp(pattern, "g"); let result = string.replace(regex, ""); console.log(result); function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; } 

Which is when it becomes more complicated, as you want to generate a more complicated pattern for each word. You generally want to match words , so you can use the word boundary character \\b which means that the pattern for "reading" can now look like this:

\breading\b|\breadin\b|\breadi\b|\bread\b|\brea\b|\bre\b|\br\b
↑↑       ↑↑ ↑↑      ↑↑ ↑↑     ↑↑ ↑↑    ↑↑ ↑↑   ↑↑ ↑↑  ↑↑ ↑↑ ↑↑

In the interest of keeping the output at least somewhat readable, it can instead be put in a group and the whole group made to match a single word:

\b(?:reading|readin|readi|read|rea|re|r)\b
   ↑↑
   ||____ non-capturing group

So, you have to generate this pattern

function toPattern(word) {
  let substrings = allSubstrings(word);
  //escape backslashes, because this is a string literal and we need \b as content
  let pattern = "\\b(?:" + substrings.join("|") + ")\\b"; 

  return pattern;
}

Which leads us to this

 const array = ['reading', 'books']; const string = 'The quick brown fox jumps over the lazy dog. If you want to read the book, just read it.'; let pattern = array .map(toPattern) .join("|"); let regex = new RegExp(pattern, "g"); let result = string.replace(regex, ""); console.log(result); function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; } function toPattern(word) { let substrings = allSubstrings(word); let pattern = "\\\\b(?:" + substrings.join("|") + ")\\\\b"; return pattern; } 

This will suffice to solve your task. So it's possible to generate a regex. The final one looks like this:

/\b(?:reading|readin|readi|read|rea|re|r)\b|\b(?:books|book|boo|bo|b)\b/g

But most of the generation of it is spent trying to generate something that works . It's not a necessarily complex solution but as mentioned, the one suggested by CertainPerformance is better because it's simpler which means less chance of it failing and it would be easier to maintain for the future.

I don't know of a straight way to do it, but you can create your own regexp pattern, like so:

// This function create a regex pattern string for each word in the array.
// The str is the string value (the word), 
// min is the minimum required letters in eac h word 
function getRegexWithMinChars(str, min) {
    var charArr = str.split("");
    var length = charArr.length;
    var regexpStr = "";
    for(var i = 0; i < length; i++){
        regexpStr +="[" + charArr[i] + "]" + (i < min ? "" : "?");
    }
    return regexpStr;
}

// This function returns a regexp object with the patters of the words in the array
function getStrArrayRegExWithMinChars(strArr, min) {
    var length = strArr.length;
    var regexpStr = "";
    for(var i = 0; i < length; i++) {
        regexpStr += "(" + getRegexWithMinChars(strArr[i], min) + ")?";
    }
    return new RegExp(regexpStr, "gm");
}

var regexp = getStrArrayRegExWithMinChars(searchArr, 3);

// With the given regexp I was able to use string replace to 
// find and replace all the words in the string
str.replace(regexp, "");

//The same can be done with one ES6 function
const getStrArrayRegExWithMinChars = (searchArr, min) => {
    return searchArr.reduce((wordsPatt, word) => {
        const patt = word.split("").reduce((wordPatt, letter, index) => {
                return wordPatt + "[" + letter + "]" + (index < min ? "" : "?");
            },"");
        return wordsPatt + "(" + patt + ")?";
    }, "");
}

var regexp = getStrArrayRegExWithMinChars(searchArr, 3);

// With the given regexp I was able to use string replace to 
// find and replace all the words in the string
str.replace(regexp, "");

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM