簡體   English   中英

在較大的字符串中查找包含給定字母集的最小子字符串

[英]Find smallest substring containing a given set of letters in a larger string

假設您有以下字符串:

FJKAUNOJDCUTCRHBYDLXKEODVBWTYPTSHASQQFCPRMLDXIJMYPVOHBDUGSMBLMVUMMZYHULSUIZIMZTICQORLNTOVKVAMQTKHVRIFMNTSLYGHEHFAHWWATLYAPEXTHEPKJUGDVWUDDPRQLUZMSZOJPSIKAIHLTONYXAULECXXKWFQOIKELWOHRVRUCXIAASKHMWTMAJEWGEESLWRTQKVHRRCDYXNT
LDSUPXMQTQDFAQAPYBGXPOLOCLFQNGNKPKOBHZWHRXAWAWJKMTJSLDLNHMUGVVOPSAMRUJEYUOBPFNEHPZZCLPNZKWMTCXERPZRFKSXVEZTYCXFRHRGEITWHRRYPWSVAYBUHCERJXDCYAVICPTNBGIODLYLMEYLISEYNXNMCDPJJRCTLYNFMJZQNCLAGHUDVLYIGASGXSZYPZKLAWQUDVNTWGFFY
FFSMQWUNUPZRJMTHACFELGHDZEJWFDWVPYOZEVEJKQWHQAHOCIYWGVLPSHFESCGEUCJGYLGDWPIWIDWZZXRUFXERABQJOXZALQOCSAYBRHXQQGUDADYSORTYZQPWGMBLNAQOFODSNXSZFURUNPMZGHTAJUJROIGMRKIZHSFUSKIZJJTLGOEEPBMIXISDHOAIFNFEKKSLEXSJLSGLCYYFEQBKIZZTQQ
XBQZAPXAAIFQEIXELQEZGFEPCKFPGXULLAHXTSRXDEMKFKABUTAABSLNQBNMXNEPODPGAORYJXCHCGKECLJVRBPRLHORREEIZOBSHDSCETTTNFTSMQPQIJBLKNZDMXOTRBNMTKHHCZQQMSLOAXJQKRHDGZVGITHYGVDXRTVBJEAHYBYRYKJAVXPOKHFFMEPHAGFOOPFNKQAUGYLVPWUJUPCUGGIXGR
AMELUTEPYILBIUOCKKUUBJROQFTXMZRLXBAMHSDTEKRRIKZUFNLGTQAEUINMBPYTWXULQNIIRXHHGQDPENXAJNWXULFBNKBRINUMTRBFWBYVNKNKDFR

我試圖找到包含字母ABCDA的最小子字符串。

我嘗試了正則表達式方法。

console.log(str.match(/[A].*?[B].*?[C].*?[D].*?[A]/gm).sort((a, b) => a.length - b.length)[0]);

這有效,但它只能找到 ABCDA 出現的字符串(按該順序)。 這意味着它不會找到字母按如下順序出現的子字符串: BCDAA

我正在嘗試更改我的正則表達式來解決這個問題。 如果不使用|我該怎么做? 並輸入所有不同的案例?

你不能。

讓我們考慮一個特殊情況:假設您要查找的字母是AAB 在你的正則表達式中的某個時刻肯定會有一個B 但是, B的左右兩部分是相互獨立的,因此不能相互引用。 B右側的子表達式中匹配了多少個A取決於左側部分中已匹配的A的數量。 這對於正則表達式是不可能的,所以你必須展開所有不同的訂單,這可能很多!

另一個說明該問題的流行示例是匹配左括號和右括號。 不可能編寫正則表達式斷言在給定字符串中,左括號序列后跟相同長度的右括號序列。 這樣做的原因是,為了計算括號,您需要一個堆棧機而不是有限狀態機,但正則表達式僅限於可以使用 FSM 匹配的模式。

也許不像使用正則表達式那么清楚(好吧,對我來說正則表達式從來都不是很清楚:D)你可以使用蠻力(不是那么蠻力)

創建字符串“有效”點的索引(那些帶有你想要的字母的點)並用雙循環迭代它以獲得包含至少 5 個這些點的子字符串,檢查它們是否是有效的解決方案。 也許不是最有效的方法,但易於實施、理解,並且可能易於優化。

 var haystack=""; var needle="ABCD"; var size=haystack.length; var candidate_substring=""; var minimal_length=size; var solutions=new Array(); var points=Array(); for(var i=0;i<size;i++){ if(needle.indexOf(haystack[i])>-1) points.push(i); } var limit_i= points.length-4; var limit_k= points.length; for (var i=0;i<limit_i;i++){ for(var k=i;k<limit_k;k++){ if(points[k]-points[i]+1<=minimal_length){ candidate_substring=haystack.substr(points[i],points[k]-points[i]+1); if(is_valid(candidate_substring)){ solutions.push(candidate_substring); if(candidate_substring.length < minimal_length) minimal_length=candidate_substring.length; } } } } document.write('<p>Solution length:'+minimal_length+'<p>'); for(var i=0;i<solutions.length;i++){ if(solutions[i].length<=minimal_length) document.write('<p>Solution:'+solutions[i]+'<p>'); } function is_valid(candidate_substring){ //verify we've got all characters for(var j=0;j<candidate_substring.length;j++){ if(candidate_substring.indexOf(needle.charAt(j))<0) return false; } //...and verify we have two "A" if(candidate_substring.indexOf("A")==candidate_substring.lastIndexOf("A")) return false; return true; }

該算法不使用正則表達式,但也找到了兩種解決方案。

var haystack = 'FJKAUNOJDCUTCRHBYDLXKEODVBWTYPTSHASQQFCPRMLDXIJMYPVOHBDUGSMBLMVUMMZYHULSUIZIMZTICQORLNTOVKVAMQTKHVRIFMNTSLYGHEHFAHWWATLYAPEXTHEPKJUGDVWUDDPRQLUZMSZOJPSIKAIHLTONYXAULECXXKWFQOIKELWOHRVRUCXIAASKHMWTMAJEWGEESLWRTQKVHRRCDYXNTLDSUPXMQTQDFAQAPYBGXPOLOCLFQNGNKPKOBHZWHRXAWAWJKMTJSLDLNHMUGVVOPSAMRUJEYUOBPFNEHPZZCLPNZKWMTCXERPZRFKSXVEZTYCXFRHRGEITWHRRYPWSVAYBUHCERJXDCYAVICPTNBGIODLYLMEYLISEYNXNMCDPJJRCTLYNFMJZQNCLAGHUDVLYIGASGXSZYPZKLAWQUDVNTWGFFYFFSMQWUNUPZRJMTHACFELGHDZEJWFDWVPYOZEVEJKQWHQAHOCIYWGVLPSHFESCGEUCJGYLGDWPIWIDWZZXRUFXERABQJOXZALQOCSAYBRHXQQGUDADYSORTYZQPWGMBLNAQOFODSNXSZFURUNPMZGHTAJUJROIGMRKIZHSFUSKIZJJTLGOEEPBMIXISDHOAIFNFEKKSLEXSJLSGLCYYFEQBKIZZTQQXBQZAPXAAIFQEIXELQEZGFEPCKFPGXULLAHXTSRXDEMKFKABUTAABSLNQBNMXNEPODPGAORYJXCHCGKECLJVRBPRLHORREEIZOBSHDSCETTTNFTSMQPQIJBLKNZDMXOTRBNMTKHHCZQQMSLOAXJQKRHDGZVGITHYGVDXRTVBJEAHYBYRYKJAVXPOKHFFMEPHAGFOOPFNKQAUGYLVPWUJUPCUGGIXGRAMELUTEPYILBIUOCKKUUBJROQFTXMZRLXBAMHSDTEKRRIKZUFNLGTQAEUINMBPYTWXULQNIIRXHHGQDPENXAJNWXULFBNKBRINUMTRBFWBYVNKNKDFR';
var needle = 'ABCDA'; // the order of letters doesn't matter

var letters = {};
needle.split('').forEach(function(ch) {
  letters[ch] = letters[ch] || 0;
  letters[ch]++;
});

var shortestSubstringLength = haystack.length;
var shortestSubstrings = []; // storage for found substrings

var startingPos = 0;
var length;
var currentPos;
var notFound;
var letterKeys = Object.keys(letters); // unique leters
do {
  lettersLeft = JSON.parse(JSON.stringify(letters)); // copy letters count object
  notFound = false;
  posStart = haystack.length;
  posEnd = 0;
  letterKeys.forEach(function(ch) {
    currentPos = startingPos;
    while (!notFound && lettersLeft[ch] > 0) {
      currentPos = haystack.indexOf(ch, currentPos);
      if (currentPos >= 0) {
        lettersLeft[ch]--;
        posStart = Math.min(currentPos, posStart);
        posEnd = Math.max(currentPos, posEnd);
        currentPos++;
      } else {
        notFound = true;
      }
    }
  });
  if (!notFound) {
    length = posEnd - posStart + 1;
    startingPos = posStart + 1; // starting position for next iteration
  }
  if (!notFound && length === shortestSubstringLength) {
    shortestSubstrings.push(haystack.substr(posStart, length));
  }
  if (!notFound && length < shortestSubstringLength) {
    shortestSubstrings = [haystack.substr(posStart, length)];
    shortestSubstringLength = length;
  }
} while (!notFound);

console.log(shortestSubstrings);

只是在面試中遇到這個問題作為編碼任務,並提出了另一種解決方案,(它不像上面的那樣最佳,但也許更容易理解)。

function MinWindowSubstring(strArr) { 

  const N = strArr[0];
  const K = strArr[1];

  const letters = {};

  K.split('').forEach( (character) => {
    letters[character] = letters[character] ? letters[character] + 1 : 1;
  });

  let possibleSequencesList = [];
  
  const letterKeys = Object.keys(letters);

  for(let i=0; i< N.length; i++) {

    const char = N[i];

    if (new String(letterKeys).indexOf(char) !== -1) {
    
      // found a character in the string 

      // update all previus sequences
      possibleSequencesList.forEach((seq) => {
        if(!seq.sequenceComplete) {
          seq[char] = seq[char]-1;
          seq.lastIndex = i;

          // check if sequence is complete
          var sequenceComplete = true;
          letterKeys.forEach( (letter) => {
            if(seq[letter] > 0) {
              sequenceComplete = false;
            }
          });

          seq.sequenceComplete = sequenceComplete
        }
      })

      // create a new sequence starting from it 
      const newSeq = {
        startPoint: i,
        lastIndex: i,
        sequenceComplete: false,
        ...letters
      }

      newSeq[char] = newSeq[char]-1;

      possibleSequencesList.push(newSeq);
    }
  }

  // cleanup sequences 
  let sequencesList = possibleSequencesList.filter(sequence => sequence.sequenceComplete);
  
  let output = [];

  let minLength = N.length;
  // find the smalles one
  sequencesList.forEach( seq => {
      if( (seq.lastIndex - seq.startPoint) < minLength) {
        minLength = seq.lastIndex - seq.startPoint;
        output = N.substring(seq.startPoint, seq.lastIndex + 1);
      }
  })
   
  return output; 
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM