![](/img/trans.png)
[英]How to create a regular expression to find a substring within a larger string?
[英]Find smallest substring containing a given set of letters in a larger string
假設您有以下字符串:
FJKAUNOJDCUTCRHBYDLXKEODVBWTYPTSHASQQFCPRMLDXIJMYPVOHBDUGSMBLMVUMMZYHULSUIZIMZTICQORLNTOVKVAMQTKHVRIFMNTSLYGHEHFAHWWATLYAPEXTHEPKJUGDVWUDDPRQLUZMSZOJPSIKAIHLTONYXAULECXXKWFQOIKELWOHRVRUCXIAASKHMWTMAJEWGEESLWRTQKVHRRCDYXNT
LDSUPXMQTQDFAQAPYBGXPOLOCLFQNGNKPKOBHZWHRXAWAWJKMTJSLDLNHMUGVVOPSAMRUJEYUOBPFNEHPZZCLPNZKWMTCXERPZRFKSXVEZTYCXFRHRGEITWHRRYPWSVAYBUHCERJXDCYAVICPTNBGIODLYLMEYLISEYNXNMCDPJJRCTLYNFMJZQNCLAGHUDVLYIGASGXSZYPZKLAWQUDVNTWGFFY
FFSMQWUNUPZRJMTHACFELGHDZEJWFDWVPYOZEVEJKQWHQAHOCIYWGVLPSHFESCGEUCJGYLGDWPIWIDWZZXRUFXERABQJOXZALQOCSAYBRHXQQGUDADYSORTYZQPWGMBLNAQOFODSNXSZFURUNPMZGHTAJUJROIGMRKIZHSFUSKIZJJTLGOEEPBMIXISDHOAIFNFEKKSLEXSJLSGLCYYFEQBKIZZTQQ
XBQZAPXAAIFQEIXELQEZGFEPCKFPGXULLAHXTSRXDEMKFKABUTAABSLNQBNMXNEPODPGAORYJXCHCGKECLJVRBPRLHORREEIZOBSHDSCETTTNFTSMQPQIJBLKNZDMXOTRBNMTKHHCZQQMSLOAXJQKRHDGZVGITHYGVDXRTVBJEAHYBYRYKJAVXPOKHFFMEPHAGFOOPFNKQAUGYLVPWUJUPCUGGIXGR
AMELUTEPYILBIUOCKKUUBJROQFTXMZRLXBAMHSDTEKRRIKZUFNLGTQAEUINMBPYTWXULQNIIRXHHGQDPENXAJNWXULFBNKBRINUMTRBFWBYVNKNKDFR
我試圖找到包含字母ABCDA
的最小子字符串。
我嘗試了正則表達式方法。
console.log(str.match(/[A].*?[B].*?[C].*?[D].*?[A]/gm).sort((a, b) => a.length - b.length)[0]);
這有效,但它只能找到 ABCDA 出現的字符串(按該順序)。 這意味着它不會找到字母按如下順序出現的子字符串: BCDAA
我正在嘗試更改我的正則表達式來解決這個問題。 如果不使用|
我該怎么做? 並輸入所有不同的案例?
你不能。
讓我們考慮一個特殊情況:假設您要查找的字母是A
、 A
和B
。 在你的正則表達式中的某個時刻肯定會有一個B
。 但是, B
的左右兩部分是相互獨立的,因此不能相互引用。 B
右側的子表達式中匹配了多少個A
取決於左側部分中已匹配的A
的數量。 這對於正則表達式是不可能的,所以你必須展開所有不同的訂單,這可能很多!
另一個說明該問題的流行示例是匹配左括號和右括號。 不可能編寫正則表達式斷言在給定字符串中,左括號序列后跟相同長度的右括號序列。 這樣做的原因是,為了計算括號,您需要一個堆棧機而不是有限狀態機,但正則表達式僅限於可以使用 FSM 匹配的模式。
也許不像使用正則表達式那么清楚(好吧,對我來說正則表達式從來都不是很清楚:D)你可以使用蠻力(不是那么蠻力)
創建字符串“有效”點的索引(那些帶有你想要的字母的點)並用雙循環迭代它以獲得包含至少 5 個這些點的子字符串,檢查它們是否是有效的解決方案。 也許不是最有效的方法,但易於實施、理解,並且可能易於優化。
var haystack=""; var needle="ABCD"; var size=haystack.length; var candidate_substring=""; var minimal_length=size; var solutions=new Array(); var points=Array(); for(var i=0;i<size;i++){ if(needle.indexOf(haystack[i])>-1) points.push(i); } var limit_i= points.length-4; var limit_k= points.length; for (var i=0;i<limit_i;i++){ for(var k=i;k<limit_k;k++){ if(points[k]-points[i]+1<=minimal_length){ candidate_substring=haystack.substr(points[i],points[k]-points[i]+1); if(is_valid(candidate_substring)){ solutions.push(candidate_substring); if(candidate_substring.length < minimal_length) minimal_length=candidate_substring.length; } } } } document.write('<p>Solution length:'+minimal_length+'<p>'); for(var i=0;i<solutions.length;i++){ if(solutions[i].length<=minimal_length) document.write('<p>Solution:'+solutions[i]+'<p>'); } function is_valid(candidate_substring){ //verify we've got all characters for(var j=0;j<candidate_substring.length;j++){ if(candidate_substring.indexOf(needle.charAt(j))<0) return false; } //...and verify we have two "A" if(candidate_substring.indexOf("A")==candidate_substring.lastIndexOf("A")) return false; return true; }
該算法不使用正則表達式,但也找到了兩種解決方案。
var haystack = 'FJKAUNOJDCUTCRHBYDLXKEODVBWTYPTSHASQQFCPRMLDXIJMYPVOHBDUGSMBLMVUMMZYHULSUIZIMZTICQORLNTOVKVAMQTKHVRIFMNTSLYGHEHFAHWWATLYAPEXTHEPKJUGDVWUDDPRQLUZMSZOJPSIKAIHLTONYXAULECXXKWFQOIKELWOHRVRUCXIAASKHMWTMAJEWGEESLWRTQKVHRRCDYXNTLDSUPXMQTQDFAQAPYBGXPOLOCLFQNGNKPKOBHZWHRXAWAWJKMTJSLDLNHMUGVVOPSAMRUJEYUOBPFNEHPZZCLPNZKWMTCXERPZRFKSXVEZTYCXFRHRGEITWHRRYPWSVAYBUHCERJXDCYAVICPTNBGIODLYLMEYLISEYNXNMCDPJJRCTLYNFMJZQNCLAGHUDVLYIGASGXSZYPZKLAWQUDVNTWGFFYFFSMQWUNUPZRJMTHACFELGHDZEJWFDWVPYOZEVEJKQWHQAHOCIYWGVLPSHFESCGEUCJGYLGDWPIWIDWZZXRUFXERABQJOXZALQOCSAYBRHXQQGUDADYSORTYZQPWGMBLNAQOFODSNXSZFURUNPMZGHTAJUJROIGMRKIZHSFUSKIZJJTLGOEEPBMIXISDHOAIFNFEKKSLEXSJLSGLCYYFEQBKIZZTQQXBQZAPXAAIFQEIXELQEZGFEPCKFPGXULLAHXTSRXDEMKFKABUTAABSLNQBNMXNEPODPGAORYJXCHCGKECLJVRBPRLHORREEIZOBSHDSCETTTNFTSMQPQIJBLKNZDMXOTRBNMTKHHCZQQMSLOAXJQKRHDGZVGITHYGVDXRTVBJEAHYBYRYKJAVXPOKHFFMEPHAGFOOPFNKQAUGYLVPWUJUPCUGGIXGRAMELUTEPYILBIUOCKKUUBJROQFTXMZRLXBAMHSDTEKRRIKZUFNLGTQAEUINMBPYTWXULQNIIRXHHGQDPENXAJNWXULFBNKBRINUMTRBFWBYVNKNKDFR';
var needle = 'ABCDA'; // the order of letters doesn't matter
var letters = {};
needle.split('').forEach(function(ch) {
letters[ch] = letters[ch] || 0;
letters[ch]++;
});
var shortestSubstringLength = haystack.length;
var shortestSubstrings = []; // storage for found substrings
var startingPos = 0;
var length;
var currentPos;
var notFound;
var letterKeys = Object.keys(letters); // unique leters
do {
lettersLeft = JSON.parse(JSON.stringify(letters)); // copy letters count object
notFound = false;
posStart = haystack.length;
posEnd = 0;
letterKeys.forEach(function(ch) {
currentPos = startingPos;
while (!notFound && lettersLeft[ch] > 0) {
currentPos = haystack.indexOf(ch, currentPos);
if (currentPos >= 0) {
lettersLeft[ch]--;
posStart = Math.min(currentPos, posStart);
posEnd = Math.max(currentPos, posEnd);
currentPos++;
} else {
notFound = true;
}
}
});
if (!notFound) {
length = posEnd - posStart + 1;
startingPos = posStart + 1; // starting position for next iteration
}
if (!notFound && length === shortestSubstringLength) {
shortestSubstrings.push(haystack.substr(posStart, length));
}
if (!notFound && length < shortestSubstringLength) {
shortestSubstrings = [haystack.substr(posStart, length)];
shortestSubstringLength = length;
}
} while (!notFound);
console.log(shortestSubstrings);
只是在面試中遇到這個問題作為編碼任務,並提出了另一種解決方案,(它不像上面的那樣最佳,但也許更容易理解)。
function MinWindowSubstring(strArr) {
const N = strArr[0];
const K = strArr[1];
const letters = {};
K.split('').forEach( (character) => {
letters[character] = letters[character] ? letters[character] + 1 : 1;
});
let possibleSequencesList = [];
const letterKeys = Object.keys(letters);
for(let i=0; i< N.length; i++) {
const char = N[i];
if (new String(letterKeys).indexOf(char) !== -1) {
// found a character in the string
// update all previus sequences
possibleSequencesList.forEach((seq) => {
if(!seq.sequenceComplete) {
seq[char] = seq[char]-1;
seq.lastIndex = i;
// check if sequence is complete
var sequenceComplete = true;
letterKeys.forEach( (letter) => {
if(seq[letter] > 0) {
sequenceComplete = false;
}
});
seq.sequenceComplete = sequenceComplete
}
})
// create a new sequence starting from it
const newSeq = {
startPoint: i,
lastIndex: i,
sequenceComplete: false,
...letters
}
newSeq[char] = newSeq[char]-1;
possibleSequencesList.push(newSeq);
}
}
// cleanup sequences
let sequencesList = possibleSequencesList.filter(sequence => sequence.sequenceComplete);
let output = [];
let minLength = N.length;
// find the smalles one
sequencesList.forEach( seq => {
if( (seq.lastIndex - seq.startPoint) < minLength) {
minLength = seq.lastIndex - seq.startPoint;
output = N.substring(seq.startPoint, seq.lastIndex + 1);
}
})
return output;
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.