[英]How to remove word in string based on array in Javascript when word's character length in string is fewer than in array?
我想刪除基於數組的字符串中的某些單詞。 但是字符串中單詞的字符長度小於數組中的字符長度。 是否可以使用正則表達式匹配它,然后將其替換為空字符串? 如果沒有,那有什么替代方案?
我嘗試使用正則表達式來匹配該詞,但我無法實現。 我不知道如何使正則表達式匹配數組中的最少3個字符。
array = ['reading', 'books'];
string = 'If you want to read the book, just read it.';
desiredOutput = 'If you want to the , just it.';
// Desired match
'reading' -> match for 'rea', 'read', 'readi', 'readin', 'reading'
'books' -> match for 'boo', 'book', 'books'
一種選擇是匹配了3個或多個單詞字符開始於一個單詞邊界,然后用替代品函數返回空字符串如有的話startsWith
有問題的話:
const array = ['reading', 'books']; const string = 'If you want to read the book, just read it.'; const output = string.replace( /\\b\\w{3,}/g, word => array.some(item => item.startsWith(word)) ? '' : word ); console.log(output);
當然,PerformancePerformance的答案更好-易於實現和維護,但值得注意的是-您還可以從數組生成正則表達式。
這個想法很簡單-如果要匹配r
, re
, rea
, read
, readi
, readin
,則reading
正則表達式就是reading|readin|readi|read|rea|re|r
。 首先需要最長的變化的原因是,否則正則表達式引擎將在發現的第一個匹配項處停止:
let regex = /r|re|rea|read/g // ↑_________________ console.log( // | "read".replace(regex, "")// | // ↑___________________________| )
因此,您可以選擇一個單詞並以這種模式將其分解以從中生成一個正則表達式
function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; } console.log(allSubstrings("reading"))
這樣,您可以簡單地生成所需的正則表達式。
function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; } function toPattern(word) { let substrings = allSubstrings(word); let pattern = substrings.join("|"); return pattern; } console.log(toPattern("reading"))
最后一件事是獲取一個數組並將其轉換為正則表達式。 這需要處理每個單詞 ,然后將每個正則表達式組合成與任何單詞匹配的一個:
const array = ['reading', 'books']; const string = 'If you want to read the book, just read it.'; //generate the pattern let pattern = array .map(toPattern) //first, for each word .join("|"); //join patterns for all words //convert the pattern to a regex let regex = new RegExp(pattern, "g"); let result = string.replace(regex, ""); //desiredOutput: 'If you want to the , just it.'; console.log(result); function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; } function toPattern(word) { let substrings = allSubstrings(word); let pattern = substrings.join("|"); return pattern; }
所以,這是你可以生成從陣列中的正則表達式。 在這種情況下,這是可行的,但不能保證一定可行,因為存在危險,它可能會匹配您不想要的東西。 例如, r
可以匹配任何字符,不一定需要用單詞匹配。
const array = ['reading']; const string = 'The quick brown fox jumps over the lazy dog'; // ^ ^ let pattern = array .map(word => allSubstrings(word).join("|")) .join("|"); let regex = new RegExp(pattern, "g"); let result = string.replace(regex, ""); console.log(result); function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; }
當您想為每個單詞生成一個更復雜的模式時,它變得更加復雜。 通常,您希望匹配單詞 ,因此可以使用單詞邊界字符\\b
,這意味着“讀取”的模式現在看起來像這樣:
\breading\b|\breadin\b|\breadi\b|\bread\b|\brea\b|\bre\b|\br\b
↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑
為了使輸出至少具有一定的可讀性,可以將其放在一個組中,並使整個組匹配單個單詞:
\b(?:reading|readin|readi|read|rea|re|r)\b
↑↑
||____ non-capturing group
因此,您必須生成此模式
function toPattern(word) {
let substrings = allSubstrings(word);
//escape backslashes, because this is a string literal and we need \b as content
let pattern = "\\b(?:" + substrings.join("|") + ")\\b";
return pattern;
}
這導致我們
const array = ['reading', 'books']; const string = 'The quick brown fox jumps over the lazy dog. If you want to read the book, just read it.'; let pattern = array .map(toPattern) .join("|"); let regex = new RegExp(pattern, "g"); let result = string.replace(regex, ""); console.log(result); function allSubstrings(word) { let substrings = []; for (let i = word.length; i > 0; i--) { let sub = word.slice(0, i); substrings.push(sub) } return substrings; } function toPattern(word) { let substrings = allSubstrings(word); let pattern = "\\\\b(?:" + substrings.join("|") + ")\\\\b"; return pattern; }
這足以解決您的任務。 因此可以生成一個正則表達式。 最后一個看起來像這樣:
/\b(?:reading|readin|readi|read|rea|re|r)\b|\b(?:books|book|boo|bo|b)\b/g
但大部分的產生都花在試圖產生一些作品 。 它不一定是復雜的解決方案,但是正如前面提到的,SomePerformance建議的解決方案更好,因為它更簡單,這意味着它失敗的機會更少,並且將來更容易維護。
我不知道這樣做的直接方法,但是您可以創建自己的regexp模式,如下所示:
// This function create a regex pattern string for each word in the array.
// The str is the string value (the word),
// min is the minimum required letters in eac h word
function getRegexWithMinChars(str, min) {
var charArr = str.split("");
var length = charArr.length;
var regexpStr = "";
for(var i = 0; i < length; i++){
regexpStr +="[" + charArr[i] + "]" + (i < min ? "" : "?");
}
return regexpStr;
}
// This function returns a regexp object with the patters of the words in the array
function getStrArrayRegExWithMinChars(strArr, min) {
var length = strArr.length;
var regexpStr = "";
for(var i = 0; i < length; i++) {
regexpStr += "(" + getRegexWithMinChars(strArr[i], min) + ")?";
}
return new RegExp(regexpStr, "gm");
}
var regexp = getStrArrayRegExWithMinChars(searchArr, 3);
// With the given regexp I was able to use string replace to
// find and replace all the words in the string
str.replace(regexp, "");
//The same can be done with one ES6 function
const getStrArrayRegExWithMinChars = (searchArr, min) => {
return searchArr.reduce((wordsPatt, word) => {
const patt = word.split("").reduce((wordPatt, letter, index) => {
return wordPatt + "[" + letter + "]" + (index < min ? "" : "?");
},"");
return wordsPatt + "(" + patt + ")?";
}, "");
}
var regexp = getStrArrayRegExWithMinChars(searchArr, 3);
// With the given regexp I was able to use string replace to
// find and replace all the words in the string
str.replace(regexp, "");
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.