简体   繁体   中英

Is it possible to use strtok function on only non-overlapping strings?

In the code below there are two instances of the substring "on" in the string data. But is it possible to apply strtok only on the the substring "on" which is non-overlapping (ie is not part of another word)? If yes, can please someone tell me how to and what I am doing wrong in the code below?

#include<stdio.h>
#include<string.h>
#include<ctype.h>

int main()
{  
  char data[50]="Jason could you please turn on the TV";
  char delimiter[5]="on";

  char *ptr,*pointer,*pa,*p,*pb[10];
  int i=0,j=0,k=0,count=0;

  p=data;
  pointer=data;

  while((*pointer!='\0')&&(pointer=strstr(pointer,delimiter)))
  {
    pa=pointer+strlen(delimiter);
    ptr=(--pointer);

    while((isspace(*ptr))&&(isspace(*pa)))
    {
      pb[count]=strtok(ptr,delimiter);
      printf("%s\n",pb[count]);
      count++;
      break;

     } 

      pointer++;
     (*pointer)++;

  }   


}

strspn and strcspn can be used to parse a string for a matching word.
strtok will split the string at each occurrence of the individual characters in the delimiter. This is not well suited for what you appear to want to do.

#include <stdio.h>
#include <string.h>

int main() {
    char data[50]="Jason could you please turn on the TV";
    char delimiter[5]="on";
    char *parse = data;
    size_t space = 0;
    size_t span = 0;

    while ( *parse){//parse not pointing to zero terminator
        space = strspn ( parse, " \n\t");//leading whitespace
        parse += space;//advance past whitespace
        span = strcspn ( parse, " \n\t");//not whitespace
        if ( span) {
            printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
        }
        if ( 0 == strncmp ( delimiter, parse, span)) {
            printf ( "\tword matches delimiter: %s\n", delimiter);//found match
        }
        parse += span;//advance past non whitespace for next word
    }
    return 0;
}

EDIT:

#include <stdio.h>
#include <string.h>

int main() {
    char data[50]="Jason could you please turn on the TV";
    char delimiter[5]="on";
    char *parse = data;
    size_t space = 0;
    size_t span = 0;

    while ( *parse){//parse not pointing to zero terminator
        space = strspn ( parse, " \n\t");//leading whitespace
        parse += space;//advance past whitespace
        span = strcspn ( parse, " \n\t");//not whitespace
        if ( span) {
            printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
            if ( 0 == strncmp ( delimiter, parse, span)) {
                printf ( "\tword matches delimiter: %s\n", delimiter);//found match
                *parse = 0;
                parse += span;
                space = strspn ( parse, " \n\t");//leading whitespace
                parse += space;
                break;
            }
        }
        parse += span;//advance past non whitespace for next word
    }
    printf ( "\n\nsplit strings:\n%s\n%s\n", data, parse);
    return 0;
}

The basis could be wrapped in a function. This would divide the original string into as many sub-strings as required by the delimiting word. None of the sub-strings are stored but with modification that can be achieved.

#include <stdio.h>
#include <string.h>

char *strwordsep ( char *str, char *word, size_t *stop) {
    char *parse = str;
    size_t space = 0;
    size_t span = 0;

    while ( *parse){//parse not pointing to zero terminator
        space = strspn ( parse, " \n\t");//leading whitespace
        parse += space;//advance past whitespace
        span = strcspn ( parse, " \n\t");//not whitespace
        if ( span) {
            // printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
            if ( 0 == strncmp ( word, parse, span)) {
                // printf ( "\tword matches delimiter: %s\n", word);//found match
                // *parse = 0;//zero terminate
                *stop = parse - str;
                parse += span;//advance past delimiter
                space = strspn ( parse, " \n\t");//leading whitespace
                parse += space;//advance past whiteespace
                return parse;
            }
        }
        parse += span;//advance past non whitespace for next word
    }
    return NULL;
}

int main() {
    char data[]="Jason, I am on the phone, could you please turn on the TV";
    char word[5]="on";
    char *lead = data;
    char *trail = data;
    size_t stop = 0;
    while ( ( trail = strwordsep ( lead, word, &stop))) {
        printf ( "\nsplit strings:\n%.*s\n", (int)stop, lead);
        lead = trail;
    }
    if ( *lead) {
        printf ( "\nsplit strings:\n%s\n", lead);
    }
    return 0;
}

Edit

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *strwordsep ( char *str, char *word, size_t *stop) {
    char *parse = str;
    size_t space = 0;
    size_t span = 0;

    while ( *parse){//parse not pointing to zero terminator
        space = strspn ( parse, " \n\t");//leading whitespace
        parse += space;//advance past whitespace
        span = strcspn ( parse, " \n\t");//not whitespace
        if ( span) {
            // printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
            if ( 0 == strncmp ( word, parse, span)) {
                // printf ( "\tword matches delimiter: %s\n", word);//found match
                // *parse = 0;//zero terminate
                *stop = parse - str;
                parse += span;//advance past delimiter
                space = strspn ( parse, " \n\t");//leading whitespace
                parse += space;//advance past whiteespace
                return parse;
            }
        }
        parse += span;//advance past non whitespace for next word
    }
    return NULL;
}

char **freelines ( char **ppc) {
    int each = 0;
    while ( ppc[each]) {//loop until sentinel NULL
        free ( ppc[each]);//free memory
        each++;
    }
    free ( ppc);//free pointers
    return NULL;
}

char **addline ( char **ppc, int *lines, char *add, int length) {
    char **temp = NULL;
    if ( ( temp = realloc ( ppc, sizeof ( *temp) * ( *lines + 2)))) {//add pointer
        ppc = temp;//assign reallocated pointer to original
        if ( ( ppc[*lines] = malloc ( length + 1))) {//allocate memory to pointer
            strncpy ( ppc[*lines], add, length);//copy lenght characters to pointer
            ppc[*lines][length] = 0;
        }
        else {
            fprintf ( stderr, "problem malloc\n");
            ppc = freelines ( ppc);//release memory
            return ppc;
        }
        ppc[*lines + 1] = NULL;//sentinel NULL
        *lines = *lines + 1;
    }
    else {
        fprintf ( stderr, "problem realloc\n");
        ppc = freelines ( ppc);//release memory
        return ppc;
    }
    return ppc;
}

void showlines ( char **ppc) {
    int each = 0;
    while ( ppc[each]) {
        printf ( "output[%d]= %s\n", each, ppc[each]);
        each++;
    }
}

int main() {
    char data[]="Jason, I am on the phone, could you please turn on the TV";
    char word[5]="on";
    char **output = NULL;//pointer to pointer to store sub-strings
    char *lead = data;
    char *trail = data;
    int lines = 0;
    size_t stop = 0;
    while ( ( trail = strwordsep ( lead, word, &stop))) {
        if ( ! ( output = addline ( output, &lines, lead, (int)stop))) {
            return 0;
        }
        lead = trail;
    }
    if ( *lead) {
        if ( ! ( output = addline ( output, &lines, lead, (int)strlen ( lead)))) {
            return 0;
        }
    }
    showlines ( output);
    output = freelines ( output);
    return 0;
}

It's not entirely clear from your use of "non-overlapping" what your intent with data is, but I take it from your additional comments that you want to find "on" within data as a whole-word and not the "on" as part of "Jason" .

When attempting to locate "on" within data , you don't need strtok, strspn or strcspn , the correct tool for the job is strstr which allows you to find the first occurrence of a substring within a string. You only job is identifying the correct substring to search for.

Since in this case you want to fine "on" as a whole-word, why not search for " on" to locate "on" preceded by a space. (you could expand that to all whitespace as well, but for purpose of your sentence we will use space separated words and then expand the check for all whitespace to ensure what follows "on" is whitespace).

First, related to your initialization of data , unless you intend to append to your string within your code, there is no need to specify the magic-number 50 , simply leave the [] empty and data will be size appropriately to hold the string, eg

    char data[]="Jason could you please turn on the TV",
        *p = data;    /* pointer to data */

Likewise, unless you plan on changing your delimiter, you can simply use a string-literal , eg

    const char *delim = " on";

Then to locate " on" within data, all you need is a single call to strstr (p, delim) , and you can make the call within a conditional expression to determine whether it exists, eg

    if ((p = strstr (p, delim))) {
        size_t len = strlen (delim);
        char *next = p + len;
        if (isspace (*next) || ispunct (*next)) {
            printf ("found: '%s' (now what?)\n", ++p);
        }
    }

If it is found, just declare a pointer (or use array indexing with p ) to access the next character after " on" . You can then test whether what follows " on" is whitespace which confirms you have found your wanted substring. Since you know p points to the space before "on" , you can simply increment the pointer p to point to "on" itself as was done above within the printf statement. Now what you do with the remainder of the string is up to you. You have p pointing to the beginning of the string, and next pointing to the whitespace following "on" , so you can trivially copy "on" or nul-terminate at next -- whatever it is you need to do.

Putting it altogether you would have:

#include <stdio.h>
#include <string.h>
#include <ctype.h>

int main (void) {

    char data[]="Jason could you please turn on the TV",
        *p = data;
    const char *delim = " on";

    if ((p = strstr (p, delim))) {
        size_t len = strlen (delim);
        char *next = p + len;
        if (isspace (*next) || ispunct (*next)) {
            printf ("found: '%s' (now what?)\n", ++p);
        }
    }

    return 0;
}

Example Use/Output

$ ./bin/strstr_on
found: 'on the TV' (now what?)

Look things over and let me know if you have any further questions.

Finding Multiple "on" In String

As explained in the comments below, if you have multiple "on" located in your input, all you need to do is fashion the above if statement into a loop and then set p = next; at the end of the loop. For example, the only changes needed to find all substrings beginning with "on" , you could:

    char data[]="Jason could you please turn on the TV on the desk",
    ...
    while ((p = strstr (p, delim))) {
        size_t len = strlen (delim);
        char *next = p + len;
        if (isspace (*next) || ispunct (*next)) {
            printf ("found: '%s' (now what?)\n", ++p);
        }
        p = next;
    }

Use/Output Finding All "on"

$ ./bin/strstr_on
found: 'on the TV on the desk' (now what?)
found: 'on the desk' (now what?)

Let me know if you have any more questions.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM