簡體   English   中英

strstr 是否有反向函數

[英]Is there a reverse function for strstr

我試圖找到一個與strstr類似的函數,它搜索從字符串的末尾開始到開頭的子字符串。

標准 C 庫沒有“反向 strstr”函數,因此您必須自己查找或編寫。

我想出了幾個自己的解決方案,並在該線程中添加了一些測試和基准測試代碼以及​​其他功能。 對於那些好奇的人,在我的筆記本電腦(Ubuntu karmic,amd64 架構)上運行的輸出如下所示:

$ gcc -O2 --std=c99 strrstr.c && ./a.out
#1 0.123 us last_strstr
#2 0.440 us theo
#3 0.460 us cordelia
#4 1.690 us digitalross
#5 7.700 us backwards_memcmp
#6 8.600 us sinan

您的結果可能會有所不同,並且根據您的編譯器和庫,結果的順序也可能有所不同。

要從字符串的開頭獲取匹配項的偏移量(索引),請使用指針算法:

char *match = last_strstr(haystack, needle);
ptrdiff_t index;
if (match != NULL)
    index = match - haystack;
else
    index = -1;

現在,落葉松(請注意,這純粹是用 C 語言編寫的,我不太了解 C++,無法給出答案):

#include <string.h>
#include <stdlib.h>

/* By liw. */
static char *last_strstr(const char *haystack, const char *needle)
{
    if (*needle == '\0')
        return (char *) haystack;

    char *result = NULL;
    for (;;) {
        char *p = strstr(haystack, needle);
        if (p == NULL)
            break;
        result = p;
        haystack = p + 1;
    }

    return result;
}


/* By liw. */
static char *backwards_memcmp(const char *haystack, const char *needle)
{
    size_t haylen = strlen(haystack);

    if (*needle == '\0')
        return (char *) haystack;

    size_t needlelen = strlen(needle);
    if (needlelen > haylen)
        return NULL;

    const char *p = haystack + haylen - needlelen;
    for (;;) {
        if (memcmp(p, needle, needlelen) == 0)
            return (char *) p;
        if (p == haystack)
            return NULL;
        --p;
    }
}


/* From http://stuff.mit.edu/afs/sipb/user/cordelia/Diplomacy/mapit/strrstr.c
 */
static char *cordelia(const char *s1, const char *s2)
{
 const char *sc1, *sc2, *psc1, *ps1;

 if (*s2 == '\0')
  return((char *)s1);

 ps1 = s1 + strlen(s1);

 while(ps1 != s1) {
  --ps1;
  for (psc1 = ps1, sc2 = s2; ; )
   if (*(psc1++) != *(sc2++))
    break;
   else if (*sc2 == '\0')
    return ((char *)ps1);
 }
 return ((char *)NULL);
}


/* From http://stackoverflow.com/questions/1634359/
   is-there-a-reverse-fn-for-strstr/1634398#1634398 (DigitalRoss). */
static char *reverse(const char *s)
{
  if (s == NULL)
    return NULL;
  size_t i, len = strlen(s);
  char *r = malloc(len + 1);

  for(i = 0; i < len; ++i)
    r[i] = s[len - i - 1];
  r[len] = 0;
  return r;
}
char *digitalross(const char *s1, const char *s2)
{
  size_t  s1len = strlen(s1);
  size_t  s2len = strlen(s2);
  const char *s;

  if (s2len == 0)
    return (char *) s1;
  if (s2len > s1len)
    return NULL;
  for (s = s1 + s1len - s2len; s >= s1; --s)
    if (strncmp(s, s2, s2len) == 0)
      return (char *) s;
  return NULL;
}


/* From http://stackoverflow.com/questions/1634359/
  is-there-a-reverse-fn-for-strstr/1634487#1634487 (Sinan Ünür). */

char *sinan(const char *source, const char *target)
{
    const char *current;
    const char *found = NULL;

    if (*target == '\0')
        return (char *) source;

    size_t target_length = strlen(target);
    current = source + strlen(source) - target_length;

    while ( current >= source ) {
        if ( (found = strstr(current, target)) ) {
            break;
        }
        current -= 1;
    }

    return (char *) found;
}


/* From http://stackoverflow.com/questions/1634359/
  is-there-a-reverse-fn-for-strstr/1634441#1634441 (Theo Spears). */
char *theo(const char* haystack, const char* needle)
{
  size_t needle_length = strlen(needle);
  const char* haystack_end = haystack + strlen(haystack) - needle_length;
  const char* p;
  size_t i;

  if (*needle == '\0')
    return (char *) haystack;
  for(p = haystack_end; p >= haystack; --p)
  {
    for(i = 0; i < needle_length; ++i) {
      if(p[i] != needle[i])
        goto next;
    }
    return (char *) p;

    next:;
  }
  return 0;
}


/*
 * The rest of this code is a test and timing harness for the various
 * implementations above.
 */


#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>


/* Check that the given function works. */
static bool works(const char *name, char *(*func)(const char *, const char *))
{
    struct {
        const char *haystack;
        const char *needle;
        int offset;
    } tests[] = {
        { "", "", 0 },
        { "", "x", -1 },
        { "x", "", 0 },
        { "x", "x", 0 },
        { "xy", "x", 0 },
        { "xy", "y", 1 },
        { "xyx", "x", 2 },
        { "xyx", "y", 1 },
        { "xyx", "z", -1 },
        { "xyx", "", 0 },
    };
    const int num_tests = sizeof(tests) / sizeof(tests[0]);
    bool ok = true;

    for (int i = 0; i < num_tests; ++i) {
        int offset;
        char *p = func(tests[i].haystack, tests[i].needle);
        if (p == NULL)
            offset = -1;
        else
            offset = p - tests[i].haystack;
        if (offset != tests[i].offset) {
            fprintf(stderr, "FAIL %s, test %d: returned %d, haystack = '%s', "
                            "needle = '%s', correct return %d\n",
                            name, i, offset, tests[i].haystack, tests[i].needle,
                            tests[i].offset);
            ok = false;
        }
    }
    return ok;
}


/* Dummy function for calibrating the measurement loop. */
static char *dummy(const char *haystack, const char *needle)
{
    return NULL;
}


/* Measure how long it will take to call the given function with the
   given arguments the given number of times. Return clock ticks. */
static clock_t repeat(char *(*func)(const char *, const char *),
                       const char *haystack, const char *needle,
                       long num_times)
{
    clock_t start, end;

    start = clock();
    for (long i = 0; i < num_times; ++i) {
        func(haystack, needle);
    }
    end = clock();
    return end - start;
}


static clock_t min(clock_t a, clock_t b)
{
    if (a < b)
        return a;
    else
        return b;
}


/* Measure the time to execute one call of a function, and return the
   number of CPU clock ticks (see clock(3)). */
static double timeit(char *(*func)(const char *, const char *))
{
    /* The arguments for the functions to be measured. We deliberately
       choose a case where the haystack is large and the needle is in
       the middle, rather than at either end. Obviously, any test data
       will favor some implementations over others. This is the weakest
       part of the benchmark. */

    const char haystack[] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "b"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
                            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
    const char needle[] = "b";

    /* First we find out how many repeats we need to do to get a sufficiently
       long measurement time. These functions are so fast that measuring
       only a small number of repeats will give wrong results. However,
       we don't want to do a ridiculously long measurement, either, so 
       start with one repeat and multiply it by 10 until the total time is
       about 0.2 seconds. 

       Finally, we measure the dummy function the same number of times
       to get rid of the call overhead.

       */

    clock_t mintime = 0.2 * CLOCKS_PER_SEC;
    clock_t clocks;
    long repeats = 1;
    for (;;) {
        clocks = repeat(func, haystack, needle, repeats);
        if (clocks >= mintime)
            break;
        repeats *= 10;
    }

    clocks = min(clocks, repeat(func, haystack, needle, repeats));
    clocks = min(clocks, repeat(func, haystack, needle, repeats));

    clock_t dummy_clocks;

    dummy_clocks = repeat(dummy, haystack, needle, repeats);
    dummy_clocks = min(dummy_clocks, repeat(dummy, haystack, needle, repeats));
    dummy_clocks = min(dummy_clocks, repeat(dummy, haystack, needle, repeats));

    return (double) (clocks - dummy_clocks) / repeats / CLOCKS_PER_SEC;
}


/* Array of all functions. */
struct func {
    const char *name;
    char *(*func)(const char *, const char *);
    double secs;
} funcs[] = {
#define X(func) { #func, func, 0 }
    X(last_strstr),
    X(backwards_memcmp),
    X(cordelia),
    X(digitalross),
    X(sinan),
    X(theo),
#undef X
};
const int num_funcs = sizeof(funcs) / sizeof(funcs[0]);


/* Comparison function for qsort, comparing timings. */
int funcmp(const void *a, const void *b)
{
    const struct func *aa = a;
    const struct func *bb = b;

    if (aa->secs < bb->secs)
        return -1;
    else if (aa->secs > bb->secs)
        return 1;
    else
        return 0;
}


int main(void)
{

    bool ok = true;
    for (int i = 0; i < num_funcs; ++i) {
        if (!works(funcs[i].name, funcs[i].func)) {
            fprintf(stderr, "%s does not work\n", funcs[i].name);            
            ok = false;
        }
    }
    if (!ok)
        return EXIT_FAILURE;

    for (int i = 0; i < num_funcs; ++i)
        funcs[i].secs = timeit(funcs[i].func);
    qsort(funcs, num_funcs, sizeof(funcs[0]), funcmp);
    for (int i = 0; i < num_funcs; ++i)
        printf("#%d %.3f us %s\n", i+1, funcs[i].secs * 1e6, funcs[i].name);

    return 0;
}

我不知道一個。 C 的優點之一是,如果您編寫自己的函數,它與庫函數一樣快速和高效。 (在許多其他語言中完全不是這種情況。)

您可以反轉字符串和子字符串,然后進行搜索。

最后,當字符串庫不夠好時,人們經常做的另一件事是轉向正則表達式。

好的,我寫了reverse()rstrstr() ,如果幸運的話,它們可能會起作用。 擺脫 C++ 的__restrict 您可能還希望將參數設為const ,但隨后您將需要轉換返回值。 要回答您的評論問題,您只需從中減去原始字符串指針即可從子字符串的地址中獲取索引。 行:

#include <stdlib.h>
#include <string.h>

char *reverse(const char * __restrict const s)
{
  if (s == NULL)
    return NULL;
  size_t i, len = strlen(s);
  char *r = malloc(len + 1);

  for(i = 0; i < len; ++i)
    r[i] = s[len - i - 1];
  r[len] = 0;
  return r;
}

char *rstrstr(char *__restrict s1, char *__restrict s2)
{
  size_t  s1len = strlen(s1);
  size_t  s2len = strlen(s2);
  char *s;

  if (s2len > s1len)
    return NULL;
  for (s = s1 + s1len - s2len; s >= s1; --s)
    if (strncmp(s, s2, s2len) == 0)
      return s;
  return NULL;
}

如果你可以使用 C++,你可以像這樣搜索字符串:

std::string::iterator found=std::search(haystack.rbegin(), haystack.rend(), needle.rbegin(), needle.rend()).base();
// => yields haystack.begin() if not found, otherwise, an iterator past-the end of the occurence of needle

一種可能的(如果不是完全優雅的)實現可能如下所示:

#include "string.h"

const char* rstrstr(const char* haystack, const char* needle)
{
  int needle_length = strlen(needle);
  const char* haystack_end = haystack + strlen(haystack) - needle_length;
  const char* p;
  size_t i;

  for(p = haystack_end; p >= haystack; --p)
  {
    for(i = 0; i < needle_length; ++i) {
      if(p[i] != needle[i])
        goto next;
    }
    return p;

    next:;
  }
  return 0;
}

不。這是 C++ std::string 類具有明顯優勢的地方之一——除了std::string::find() ,還有std::string::rfind()

我認為你仍然可以使用庫函數來做到這一點。

1.使用strrev函數反轉字符串。

2.使用strstr函數做任何你想做的事。

3.您可以通過從原始字符串的長度中減去搜索字符串的起始索引來找到搜索字符串的起始索引(反向)。

盡管不是標准的,但strrstr得到了廣泛的支持,並且完全符合您的要求。

是否有 C 庫函數來查找字符串中子字符串最后一次出現的索引?

編輯:正如@hhafez 在下面的評論中指出的那樣,我為此發布的第一個解決方案效率低下且不正確(因為我通過target_length提前了指針,這在我的愚蠢測試中運行良好)。 您可以在編輯歷史記錄中找到該版本。

這是一個從最后開始並返回的實現:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const char *
findlast(const char *source, const char *target) {
    const char *current;
    const char *found = NULL;

    size_t target_length = strlen(target);
    current = source + strlen(source) - target_length;

    while ( current >= source ) {
        if ( (found = strstr(current, target)) ) {
            break;
        }
        current -= 1;
    }

    return found;
}

int main(int argc, char *argv[]) {
    if ( argc != 3 ) {
        fputs("invoke with source and search strings as arguments", stderr);
        return EXIT_FAILURE;
    }

    const char *found = findlast(argv[1], argv[2]);

    if ( found ) {
        printf("Last occurence of '%s' in '%s' is at offset %d\n",
                argv[2], argv[1], found - argv[1]
                );
    }
    return 0;
}

輸出:

C:\Temp> st "this is a test string that tests this" test
Last occurence of 'test' in 'this is a test string that tests this' is 
at offset 27

我不相信 c 字符串庫中存在,但是編寫自己的字符串會很簡單,在一種情況下,您知道字符串的長度或者它已正確終止。

標准 C 庫中沒有。 您可以在網上找到一個,或者您可能需要自己編寫。

長話短說:

不 - C 庫中沒有任何功能可以滿足您的需求。

但正如其他人指出的那樣:編寫這樣的函數不是火箭科學......

這是一個。 測試它是一個練習,我會留給你:)

感謝您的回答! 還有一種來自 MSDN 論壇的方法。 http://social.msdn.microsoft.com/Forums/en-US/vclanguage/thread/ed0f6ef9-8911-4879-accb-b3c778a09d94

char * strrstr(char *_Str, char *_SubStr){
    char *returnPointer, *p;

    //find 1st occurence. if not found, return NULL
    if ( (p=strstr(_Str, _SubStr))==NULL)
        return NULL;

    //loop around until no more occurences
    do{
        returnPointer=p;
        ++p;
    }while(p=strstr(p, _SubStr));

    return returnPointer;
}

為此,您可以使用標准算法 std::find_end。 例如

    char s[] = "What is the last word last";
    char t[] = "last";

    std::cout << std::find_end( s, s + sizeof( s ) - 1, t, t + sizeof( t ) -1 )
              << std::endl;

這是我能想到的最小的簡單植入。 與此函數的其他實現不同,它避免了像 user3119703 這樣的其他人所擁有的初始 strstr 調用。

char * lastStrstr(const char * haystack,const char * needle){
    char*temp=haystack,*before=0;
    while(temp=strstr(temp,needle)) before=temp++;
    return before;
}
char* strrstr(char * _Str, const char * _SubStr)
{
    const BYTE EQUAL=0;
    int i=0, src_len = strlen(_Str), find_len = strlen(_SubStr),
        tail_count=0;

    for(i=src_len; i>-1; i--)
    {
        if(_Str[i] == _SubStr[0] && tail_count >= find_len)
        {
            if(strncmp(&_Str[i], _SubStr, find_len) == EQUAL)
            {
                return &_Str[i];
            }
        }
        tail_count++;
    }
    return NULL;    
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM