简体   繁体   中英

How to find indices of all occurrences of one string in another in JavaScript?

I'm trying to find the positions of all occurrences of a string in another string, case-insensitive.

For example, given the string:

I learned to play the Ukulele in Lebanon.

and the search string le , I want to obtain the array:

[2, 25, 27, 33]

Both strings will be variables - ie, I can't hard-code their values.

I figured that this was an easy task for regular expressions, but after struggling for a while to find one that would work, I've had no luck.

I found this example of how to accomplish this using .indexOf() , but surely there has to be a more concise way to do it?

var str = "I learned to play the Ukulele in Lebanon."
var regex = /le/gi, result, indices = [];
while ( (result = regex.exec(str)) ) {
    indices.push(result.index);
}

UPDATE

I failed to spot in the original question that the search string needs to be a variable. I've written another version to deal with this case that uses indexOf , so you're back to where you started. As pointed out by Wrikken in the comments, to do this for the general case with regular expressions you would need to escape special regex characters, at which point I think the regex solution becomes more of a headache than it's worth.

 function getIndicesOf(searchStr, str, caseSensitive) { var searchStrLen = searchStr.length; if (searchStrLen == 0) { return []; } var startIndex = 0, index, indices = []; if (!caseSensitive) { str = str.toLowerCase(); searchStr = searchStr.toLowerCase(); } while ((index = str.indexOf(searchStr, startIndex)) > -1) { indices.push(index); startIndex = index + searchStrLen; } return indices; } var indices = getIndicesOf("le", "I learned to play the Ukulele in Lebanon."); document.getElementById("output").innerHTML = indices + "";
 <div id="output"></div>

Here is regex free version:

function indexes(source, find) {
  if (!source) {
    return [];
  }
  // if find is empty string return all indexes.
  if (!find) {
    // or shorter arrow function:
    // return source.split('').map((_,i) => i);
    return source.split('').map(function(_, i) { return i; });
  }
  var result = [];
  for (i = 0; i < source.length; ++i) {
    // If you want to search case insensitive use 
    // if (source.substring(i, i + find.length).toLowerCase() == find) {
    if (source.substring(i, i + find.length) == find) {
      result.push(i);
    }
  }
  return result;
}

indexes("I learned to play the Ukulele in Lebanon.", "le")

EDIT : and if you want to match strings like 'aaaa' and 'aa' to find [0, 2] use this version:

function indexes(source, find) {
  if (!source) {
    return [];
  }
  if (!find) {
      return source.split('').map(function(_, i) { return i; });
  }
  var result = [];
  var i = 0;
  while(i < source.length) {
    if (source.substring(i, i + find.length) == find) {
      result.push(i);
      i += find.length;
    } else {
      i++;
    }
  }
  return result;
}

You sure can do this!

//make a regular expression out of your needle
var needle = 'le'
var re = new RegExp(needle,'gi');
var haystack = 'I learned to play the Ukulele';

var results = new Array();//this is the results you want
while (re.exec(haystack)){
  results.push(re.lastIndex);
}

Edit: learn to spell RegExp

Also, I realized this isn't exactly what you want, as lastIndex tells us the end of the needle not the beginning, but it's close - you could push re.lastIndex-needle.length into the results array...

Edit: adding link

@Tim Down's answer uses the results object from RegExp.exec(), and all my Javascript resources gloss over its use (apart from giving you the matched string). So when he uses result.index , that's some sort of unnamed Match Object. In the MDC description of exec , they actually describe this object in decent detail.

One liner using String.protype.matchAll (ES2020):

[...sourceStr.matchAll(new RegExp(searchStr, 'gi'))].map(a => a.index)

Using your values:

const sourceStr = 'I learned to play the Ukulele in Lebanon.';
const searchStr = 'le';
const indexes = [...sourceStr.matchAll(new RegExp(searchStr, 'gi'))].map(a => a.index);
console.log(indexes); // [2, 25, 27, 33]

If you're worried about doing a spread and a map() in one line, I ran it with a for...of loop for a million iterations (using your strings). The one liner averages 1420ms while the for...of averages 1150ms on my machine. That's not an insignificant difference, but the one liner will work fine if you're only doing a handful of matches.

See matchAll on caniuse

If you just want to find the position of all matches I'd like to point you to a little hack:

 var haystack = 'I learned to play the Ukulele in Lebanon.', needle = 'le', splitOnFound = haystack.split(needle).map(function (culm) { return this.pos += culm.length + needle.length }, {pos: -needle.length}).slice(0, -1); // {pos: ...} – Object wich is used as this console.log(splitOnFound);

It might not be applikable if you have a RegExp with variable length but for some it might be helpful.

This is case sensitive. For case insensitivity use String.toLowerCase function before.

I am a bit late to the party (by almost 10 years, 2 months), but one way for future coders is to do it using while loop and indexOf()

let haystack = "I learned to play the Ukulele in Lebanon.";
let needle = "le";
let pos = 0; // Position Ref
let result = []; // Final output of all index's.
let hayStackLower = haystack.toLowerCase();

// Loop to check all occurrences 
while (hayStackLower.indexOf(needle, pos) != -1) {
  result.push(hayStackLower.indexOf(needle , pos));
  pos = hayStackLower.indexOf(needle , pos) + 1;
}

console.log("Final ", result); // Returns all indexes or empty array if not found

Here is a simple code snippet:

 function getIndexOfSubStr(str, searchToken, preIndex, output) { var result = str.match(searchToken); if (result) { output.push(result.index +preIndex); str=str.substring(result.index+searchToken.length); getIndexOfSubStr(str, searchToken, preIndex, output) } return output; } var str = "my name is 'xyz' and my school name is 'xyz' and my area name is 'xyz' "; var searchToken ="my"; var preIndex = 0; console.log(getIndexOfSubStr(str, searchToken, preIndex, []));

I would recommend Tim's answer. However, this comment by @blazs states "Suppose searchStr=aaa and that str=aaaaaa . Then instead of finding 4 occurences your code will find only 2 because you're making skips by searchStr.length in the loop.", which is true by looking at Tim's code, specifically this line here: startIndex = index + searchStrLen; Tim's code would not be able to find an instance of the string that's being searched that is within the length of itself. So, I've modified Tim's answer:

 function getIndicesOf(searchStr, str, caseSensitive) { var startIndex = 0, index, indices = []; if (!caseSensitive) { str = str.toLowerCase(); searchStr = searchStr.toLowerCase(); } while ((index = str.indexOf(searchStr, startIndex)) > -1) { indices.push(index); startIndex = index + 1; } return indices; } var searchStr = prompt("Enter a string."); var str = prompt("What do you want to search for in the string?"); var indices = getIndicesOf(str, searchStr); document.getElementById("output").innerHTML = indices + "";
 <div id="output"></div>

Changing it to + 1 instead of + searchStrLen will allow the index 1 to be in the indices array if I have an str of aaaaaa and a searchStr of aaa .

PS If anyone would like comments in the code to explain how the code works, please say so, and I'll be happy to respond to the request.

Follow the answer of @jcubic, his solution caused a small confusion for my case
For example var result = indexes('aaaa', 'aa') will return [0, 1, 2] instead of [0, 2]
So I updated a bit his solution as below to match my case

function indexes(text, subText, caseSensitive) {
    var _source = text;
    var _find = subText;
    if (caseSensitive != true) {
        _source = _source.toLowerCase();
        _find = _find.toLowerCase();
    }
    var result = [];
    for (var i = 0; i < _source.length;) {
        if (_source.substring(i, i + _find.length) == _find) {
            result.push(i);
            i += _find.length;  // found a subText, skip to next position
        } else {
            i += 1;
        }
    }
    return result;
}

Thanks for all the replies. I went through all of them and came up with a function that gives the first an last index of each occurrence of the 'needle' substring . I am posting it here in case it will help someone.

Please note, it is not the same as the original request for only the beginning of each occurrence. It suits my usecase better because you don't need to keep the needle length.

function findRegexIndices(text, needle, caseSensitive){
  var needleLen = needle.length,
    reg = new RegExp(needle, caseSensitive ? 'gi' : 'g'),
    indices = [],
    result;

  while ( (result = reg.exec(text)) ) {
    indices.push([result.index, result.index + needleLen]);
  }
  return indices
}

Check this solution which will able to find same character string too, let me know if something missing or not right.

 function indexes(source, find) { if (!source) { return []; } if (!find) { return source.split('').map(function(_, i) { return i; }); } source = source.toLowerCase(); find = find.toLowerCase(); var result = []; var i = 0; while(i < source.length) { if (source.substring(i, i + find.length) == find) result.push(i++); else i++ } return result; } console.log(indexes('aaaaaaaa', 'aaaaaa')) console.log(indexes('aeeaaaaadjfhfnaaaaadjddjaa', 'aaaa')) console.log(indexes('wordgoodwordgoodgoodbestword', 'wordgood')) console.log(indexes('I learned to play the Ukulele in Lebanon.', 'le'))

Here's my code (using search and slice methods)

 let s = "I learned to play the Ukulele in Lebanon" let sub = 0 let matchingIndex = [] let index = s.search(/le/i) while( index >= 0 ){ matchingIndex.push(index+sub); sub = sub + ( s.length - s.slice( index+1 ).length ) s = s.slice( index+1 ) index = s.search(/le/i) } console.log(matchingIndex)

This is what I usually use to get a string index also according to its position.

I pass following parameters:

search : the string where to search for

find : the string to find

position ('all' by default): the position by which the find string appears in search string

(if 'all' it returns the complete array of indexes)

(if 'last' it returns the last position)

function stringIndex (search, find, position = "all") {
    
    var currIndex = 0, indexes = [], found = true;
    
    while (found) {        
        var searchIndex = search.indexOf(find);
        if (searchIndex > -1) {
            currIndex += searchIndex + find.length; 
            search = search.substr (searchIndex + find.length);
            indexes.push (currIndex - find.length);
        } else found = false; //no other string to search for - exit from while loop   
    }
    
    if (position == 'all') return indexes;
    if (position > indexes.length -1) return [];
    
    position = (position == "last") ? indexes.length -1 : position;
    
    return indexes[position];        
}

//Example:
    
var myString = "Joe meets Joe and together they go to Joe's house";
console.log ( stringIndex(myString, "Joe") ); //0, 10, 38
console.log ( stringIndex(myString, "Joe", 1) ); //10
console.log ( stringIndex(myString, "Joe", "last") ); //38
console.log ( stringIndex(myString, "Joe", 5) ); //[]

Hi friends this is just another way of finding indexes of matching phrase using reduce and a helper method. Of course RegExp is more convenient and perhaps is internally implemented somehow like this. I hope you find it useful.

 function findIndexesOfPhraseWithReduce(text, phrase) { //convert text to array so that be able to manipulate. const arrayOfText = [...text]; /* this function takes the array of characters and the search phrase and start index which comes from reduce method and calculates the end with length of the given phrase then slices and joins characters and compare it whith phrase. and returns True Or False */ function isMatch(array, phrase, start) { const end = start + phrase.length; return (array.slice(start, end).join('')).toLowerCase() === phrase.toLowerCase(); } /* here we reduce the array of characters and test each character with isMach function which takes "current index" and matches the phrase with the subsequent character which starts from current index and ends at the last character of phrase(the length of phrase). */ return arrayOfText.reduce((acc, item, index) => isMatch(arrayOfText, phrase, index) ? [...acc, index] : acc, []); } findIndexesOfPhraseWithReduce("I learned to play the Ukulele in Lebanon.", "le"); 

 function findIndexesOfPhraseWithReduce(text, phrase) { const arrayOfText = [...text]; function isMatch(array, phrase, start) { const end = start + phrase.length; return (array.slice(start, end).join('')).toLowerCase() === phrase.toLowerCase(); } return arrayOfText.reduce((acc, item, index) => isMatch(arrayOfText, phrase, index) ? [...acc, index] : acc, []); } console.log(findIndexesOfPhraseWithReduce("I learned to play the Ukulele in Lebanon.", "le"));

const findAllOccurrences = (str, substr) => {
  str = str.toLowerCase();
  
  let result = [];

  let idx = str.indexOf(substr)
  
  while (idx !== -1) {
    result.push(idx);
    idx = str.indexOf(substr, idx+1);
  }
  return result;
}

console.log(findAllOccurrences('I learned to play the Ukulele in Lebanon', 'le'));
function countInString(searchFor,searchIn){

 var results=0;
 var a=searchIn.indexOf(searchFor)

 while(a!=-1){
   searchIn=searchIn.slice(a*1+searchFor.length);
   results++;
   a=searchIn.indexOf(searchFor);
 }

return results;

}

the below code will do the job for you :

function indexes(source, find) {
  var result = [];
  for(i=0;i<str.length; ++i) {
    // If you want to search case insensitive use 
    // if (source.substring(i, i + find.length).toLowerCase() == find) {
    if (source.substring(i, i + find.length) == find) {
      result.push(i);
    }
  }
  return result;
}

indexes("hello, how are you", "ar")

Use String.prototype.match .

Here is an example from the MDN docs itself:

var str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
var regexp = /[A-E]/gi;
var matches_array = str.match(regexp);

console.log(matches_array);
// ['A', 'B', 'C', 'D', 'E', 'a', 'b', 'c', 'd', 'e']

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM