In the code below there are two instances of the substring "on" in the string data. But is it possible to apply strtok only on the the substring "on" which is non-overlapping (ie is not part of another word)? If yes, can please someone tell me how to and what I am doing wrong in the code below?
#include<stdio.h>
#include<string.h>
#include<ctype.h>
int main()
{
char data[50]="Jason could you please turn on the TV";
char delimiter[5]="on";
char *ptr,*pointer,*pa,*p,*pb[10];
int i=0,j=0,k=0,count=0;
p=data;
pointer=data;
while((*pointer!='\0')&&(pointer=strstr(pointer,delimiter)))
{
pa=pointer+strlen(delimiter);
ptr=(--pointer);
while((isspace(*ptr))&&(isspace(*pa)))
{
pb[count]=strtok(ptr,delimiter);
printf("%s\n",pb[count]);
count++;
break;
}
pointer++;
(*pointer)++;
}
}
strspn
and strcspn
can be used to parse a string for a matching word.
strtok
will split the string at each occurrence of the individual characters in the delimiter. This is not well suited for what you appear to want to do.
#include <stdio.h>
#include <string.h>
int main() {
char data[50]="Jason could you please turn on the TV";
char delimiter[5]="on";
char *parse = data;
size_t space = 0;
size_t span = 0;
while ( *parse){//parse not pointing to zero terminator
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whitespace
span = strcspn ( parse, " \n\t");//not whitespace
if ( span) {
printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
}
if ( 0 == strncmp ( delimiter, parse, span)) {
printf ( "\tword matches delimiter: %s\n", delimiter);//found match
}
parse += span;//advance past non whitespace for next word
}
return 0;
}
EDIT:
#include <stdio.h>
#include <string.h>
int main() {
char data[50]="Jason could you please turn on the TV";
char delimiter[5]="on";
char *parse = data;
size_t space = 0;
size_t span = 0;
while ( *parse){//parse not pointing to zero terminator
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whitespace
span = strcspn ( parse, " \n\t");//not whitespace
if ( span) {
printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
if ( 0 == strncmp ( delimiter, parse, span)) {
printf ( "\tword matches delimiter: %s\n", delimiter);//found match
*parse = 0;
parse += span;
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;
break;
}
}
parse += span;//advance past non whitespace for next word
}
printf ( "\n\nsplit strings:\n%s\n%s\n", data, parse);
return 0;
}
The basis could be wrapped in a function. This would divide the original string into as many sub-strings as required by the delimiting word. None of the sub-strings are stored but with modification that can be achieved.
#include <stdio.h>
#include <string.h>
char *strwordsep ( char *str, char *word, size_t *stop) {
char *parse = str;
size_t space = 0;
size_t span = 0;
while ( *parse){//parse not pointing to zero terminator
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whitespace
span = strcspn ( parse, " \n\t");//not whitespace
if ( span) {
// printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
if ( 0 == strncmp ( word, parse, span)) {
// printf ( "\tword matches delimiter: %s\n", word);//found match
// *parse = 0;//zero terminate
*stop = parse - str;
parse += span;//advance past delimiter
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whiteespace
return parse;
}
}
parse += span;//advance past non whitespace for next word
}
return NULL;
}
int main() {
char data[]="Jason, I am on the phone, could you please turn on the TV";
char word[5]="on";
char *lead = data;
char *trail = data;
size_t stop = 0;
while ( ( trail = strwordsep ( lead, word, &stop))) {
printf ( "\nsplit strings:\n%.*s\n", (int)stop, lead);
lead = trail;
}
if ( *lead) {
printf ( "\nsplit strings:\n%s\n", lead);
}
return 0;
}
Edit
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *strwordsep ( char *str, char *word, size_t *stop) {
char *parse = str;
size_t space = 0;
size_t span = 0;
while ( *parse){//parse not pointing to zero terminator
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whitespace
span = strcspn ( parse, " \n\t");//not whitespace
if ( span) {
// printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
if ( 0 == strncmp ( word, parse, span)) {
// printf ( "\tword matches delimiter: %s\n", word);//found match
// *parse = 0;//zero terminate
*stop = parse - str;
parse += span;//advance past delimiter
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whiteespace
return parse;
}
}
parse += span;//advance past non whitespace for next word
}
return NULL;
}
char **freelines ( char **ppc) {
int each = 0;
while ( ppc[each]) {//loop until sentinel NULL
free ( ppc[each]);//free memory
each++;
}
free ( ppc);//free pointers
return NULL;
}
char **addline ( char **ppc, int *lines, char *add, int length) {
char **temp = NULL;
if ( ( temp = realloc ( ppc, sizeof ( *temp) * ( *lines + 2)))) {//add pointer
ppc = temp;//assign reallocated pointer to original
if ( ( ppc[*lines] = malloc ( length + 1))) {//allocate memory to pointer
strncpy ( ppc[*lines], add, length);//copy lenght characters to pointer
ppc[*lines][length] = 0;
}
else {
fprintf ( stderr, "problem malloc\n");
ppc = freelines ( ppc);//release memory
return ppc;
}
ppc[*lines + 1] = NULL;//sentinel NULL
*lines = *lines + 1;
}
else {
fprintf ( stderr, "problem realloc\n");
ppc = freelines ( ppc);//release memory
return ppc;
}
return ppc;
}
void showlines ( char **ppc) {
int each = 0;
while ( ppc[each]) {
printf ( "output[%d]= %s\n", each, ppc[each]);
each++;
}
}
int main() {
char data[]="Jason, I am on the phone, could you please turn on the TV";
char word[5]="on";
char **output = NULL;//pointer to pointer to store sub-strings
char *lead = data;
char *trail = data;
int lines = 0;
size_t stop = 0;
while ( ( trail = strwordsep ( lead, word, &stop))) {
if ( ! ( output = addline ( output, &lines, lead, (int)stop))) {
return 0;
}
lead = trail;
}
if ( *lead) {
if ( ! ( output = addline ( output, &lines, lead, (int)strlen ( lead)))) {
return 0;
}
}
showlines ( output);
output = freelines ( output);
return 0;
}
It's not entirely clear from your use of "non-overlapping" what your intent with data
is, but I take it from your additional comments that you want to find "on"
within data
as a whole-word and not the "on"
as part of "Jason"
.
When attempting to locate "on"
within data
, you don't need strtok, strspn
or strcspn
, the correct tool for the job is strstr
which allows you to find the first occurrence of a substring within a string. You only job is identifying the correct substring to search for.
Since in this case you want to fine "on"
as a whole-word, why not search for " on"
to locate "on"
preceded by a space. (you could expand that to all whitespace as well, but for purpose of your sentence we will use space separated words and then expand the check for all whitespace to ensure what follows "on"
is whitespace).
First, related to your initialization of data
, unless you intend to append to your string within your code, there is no need to specify the magic-number 50
, simply leave the []
empty and data
will be size appropriately to hold the string, eg
char data[]="Jason could you please turn on the TV",
*p = data; /* pointer to data */
Likewise, unless you plan on changing your delimiter, you can simply use a string-literal , eg
const char *delim = " on";
Then to locate " on"
within data, all you need is a single call to strstr (p, delim)
, and you can make the call within a conditional expression to determine whether it exists, eg
if ((p = strstr (p, delim))) {
size_t len = strlen (delim);
char *next = p + len;
if (isspace (*next) || ispunct (*next)) {
printf ("found: '%s' (now what?)\n", ++p);
}
}
If it is found, just declare a pointer (or use array indexing with p
) to access the next character after " on"
. You can then test whether what follows " on"
is whitespace which confirms you have found your wanted substring. Since you know p
points to the space
before "on"
, you can simply increment the pointer p
to point to "on"
itself as was done above within the printf
statement. Now what you do with the remainder of the string is up to you. You have p
pointing to the beginning of the string, and next
pointing to the whitespace following "on"
, so you can trivially copy "on"
or nul-terminate at next
-- whatever it is you need to do.
Putting it altogether you would have:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main (void) {
char data[]="Jason could you please turn on the TV",
*p = data;
const char *delim = " on";
if ((p = strstr (p, delim))) {
size_t len = strlen (delim);
char *next = p + len;
if (isspace (*next) || ispunct (*next)) {
printf ("found: '%s' (now what?)\n", ++p);
}
}
return 0;
}
Example Use/Output
$ ./bin/strstr_on
found: 'on the TV' (now what?)
Look things over and let me know if you have any further questions.
Finding Multiple "on"
In String
As explained in the comments below, if you have multiple "on"
located in your input, all you need to do is fashion the above if
statement into a loop and then set p = next;
at the end of the loop. For example, the only changes needed to find all substrings beginning with "on"
, you could:
char data[]="Jason could you please turn on the TV on the desk",
...
while ((p = strstr (p, delim))) {
size_t len = strlen (delim);
char *next = p + len;
if (isspace (*next) || ispunct (*next)) {
printf ("found: '%s' (now what?)\n", ++p);
}
p = next;
}
Use/Output Finding All "on"
$ ./bin/strstr_on
found: 'on the TV on the desk' (now what?)
found: 'on the desk' (now what?)
Let me know if you have any more questions.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.