[英]Is it possible to use strtok function on only non-overlapping strings?
在下面的代碼中,字符串數據中有兩個子字符串“ on”的實例。 但是有可能僅將strtok應用於不重疊的子字符串“ on”(即不屬於另一個單詞的一部分)嗎? 如果是,請在下面的代碼中告訴我如何做以及我做錯了什么?
#include<stdio.h>
#include<string.h>
#include<ctype.h>
int main()
{
char data[50]="Jason could you please turn on the TV";
char delimiter[5]="on";
char *ptr,*pointer,*pa,*p,*pb[10];
int i=0,j=0,k=0,count=0;
p=data;
pointer=data;
while((*pointer!='\0')&&(pointer=strstr(pointer,delimiter)))
{
pa=pointer+strlen(delimiter);
ptr=(--pointer);
while((isspace(*ptr))&&(isspace(*pa)))
{
pb[count]=strtok(ptr,delimiter);
printf("%s\n",pb[count]);
count++;
break;
}
pointer++;
(*pointer)++;
}
}
strspn
和strcspn
可以用於解析匹配單詞的字符串。
每次在分隔符中出現單個字符時, strtok
都會分割字符串。 這不太適合您似乎想做的事情。
#include <stdio.h>
#include <string.h>
int main() {
char data[50]="Jason could you please turn on the TV";
char delimiter[5]="on";
char *parse = data;
size_t space = 0;
size_t span = 0;
while ( *parse){//parse not pointing to zero terminator
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whitespace
span = strcspn ( parse, " \n\t");//not whitespace
if ( span) {
printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
}
if ( 0 == strncmp ( delimiter, parse, span)) {
printf ( "\tword matches delimiter: %s\n", delimiter);//found match
}
parse += span;//advance past non whitespace for next word
}
return 0;
}
編輯:
#include <stdio.h>
#include <string.h>
int main() {
char data[50]="Jason could you please turn on the TV";
char delimiter[5]="on";
char *parse = data;
size_t space = 0;
size_t span = 0;
while ( *parse){//parse not pointing to zero terminator
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whitespace
span = strcspn ( parse, " \n\t");//not whitespace
if ( span) {
printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
if ( 0 == strncmp ( delimiter, parse, span)) {
printf ( "\tword matches delimiter: %s\n", delimiter);//found match
*parse = 0;
parse += span;
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;
break;
}
}
parse += span;//advance past non whitespace for next word
}
printf ( "\n\nsplit strings:\n%s\n%s\n", data, parse);
return 0;
}
基礎可以包裝在函數中。 這會將原始字符串分為分隔字詞所需的盡可能多的子字符串。 沒有存儲任何子字符串,但可以進行修改。
#include <stdio.h>
#include <string.h>
char *strwordsep ( char *str, char *word, size_t *stop) {
char *parse = str;
size_t space = 0;
size_t span = 0;
while ( *parse){//parse not pointing to zero terminator
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whitespace
span = strcspn ( parse, " \n\t");//not whitespace
if ( span) {
// printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
if ( 0 == strncmp ( word, parse, span)) {
// printf ( "\tword matches delimiter: %s\n", word);//found match
// *parse = 0;//zero terminate
*stop = parse - str;
parse += span;//advance past delimiter
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whiteespace
return parse;
}
}
parse += span;//advance past non whitespace for next word
}
return NULL;
}
int main() {
char data[]="Jason, I am on the phone, could you please turn on the TV";
char word[5]="on";
char *lead = data;
char *trail = data;
size_t stop = 0;
while ( ( trail = strwordsep ( lead, word, &stop))) {
printf ( "\nsplit strings:\n%.*s\n", (int)stop, lead);
lead = trail;
}
if ( *lead) {
printf ( "\nsplit strings:\n%s\n", lead);
}
return 0;
}
編輯
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *strwordsep ( char *str, char *word, size_t *stop) {
char *parse = str;
size_t space = 0;
size_t span = 0;
while ( *parse){//parse not pointing to zero terminator
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whitespace
span = strcspn ( parse, " \n\t");//not whitespace
if ( span) {
// printf("word is: %.*s\n", (int)span, parse);//prints span number of characters
if ( 0 == strncmp ( word, parse, span)) {
// printf ( "\tword matches delimiter: %s\n", word);//found match
// *parse = 0;//zero terminate
*stop = parse - str;
parse += span;//advance past delimiter
space = strspn ( parse, " \n\t");//leading whitespace
parse += space;//advance past whiteespace
return parse;
}
}
parse += span;//advance past non whitespace for next word
}
return NULL;
}
char **freelines ( char **ppc) {
int each = 0;
while ( ppc[each]) {//loop until sentinel NULL
free ( ppc[each]);//free memory
each++;
}
free ( ppc);//free pointers
return NULL;
}
char **addline ( char **ppc, int *lines, char *add, int length) {
char **temp = NULL;
if ( ( temp = realloc ( ppc, sizeof ( *temp) * ( *lines + 2)))) {//add pointer
ppc = temp;//assign reallocated pointer to original
if ( ( ppc[*lines] = malloc ( length + 1))) {//allocate memory to pointer
strncpy ( ppc[*lines], add, length);//copy lenght characters to pointer
ppc[*lines][length] = 0;
}
else {
fprintf ( stderr, "problem malloc\n");
ppc = freelines ( ppc);//release memory
return ppc;
}
ppc[*lines + 1] = NULL;//sentinel NULL
*lines = *lines + 1;
}
else {
fprintf ( stderr, "problem realloc\n");
ppc = freelines ( ppc);//release memory
return ppc;
}
return ppc;
}
void showlines ( char **ppc) {
int each = 0;
while ( ppc[each]) {
printf ( "output[%d]= %s\n", each, ppc[each]);
each++;
}
}
int main() {
char data[]="Jason, I am on the phone, could you please turn on the TV";
char word[5]="on";
char **output = NULL;//pointer to pointer to store sub-strings
char *lead = data;
char *trail = data;
int lines = 0;
size_t stop = 0;
while ( ( trail = strwordsep ( lead, word, &stop))) {
if ( ! ( output = addline ( output, &lines, lead, (int)stop))) {
return 0;
}
lead = trail;
}
if ( *lead) {
if ( ! ( output = addline ( output, &lines, lead, (int)strlen ( lead)))) {
return 0;
}
}
showlines ( output);
output = freelines ( output);
return 0;
}
您對“非重疊”的使用並不完全清楚您的data
意圖是什么,但是我從您的其他注釋中得出,您希望在data
中將"on"
作為一個整體單詞而不是"on"
作為"Jason"
一部分。
嘗試在data
定位"on"
,不需要strtok, strspn
或strcspn
,此作業的正確工具是strstr
,它使您可以找到字符串strcspn
字符串的首次出現。 您唯一的工作就是識別要搜索的正確子字符串。
由於在這種情況下,您希望將"on"
作為一個完整的單詞進行細化,因此為什么不搜索" on"
以在"on"
之前"on"
一個空格。 (您也可以將其擴展到所有空格,但是出於句子目的,我們將使用空格分隔的單詞,然后將所有空格的檢查范圍擴大,以確保"on"
后面是空白)。
首先,與data
的初始化相關,除非您打算在代碼中附加到字符串,否則無需指定幻數 50
,只需將[]
留空, data
大小將適當地容納該字符串,例如
char data[]="Jason could you please turn on the TV",
*p = data; /* pointer to data */
同樣,除非計划更改定界符,否則可以簡單地使用string-literal ,例如
const char *delim = " on";
然后,要在數據中定位" on"
,您只需對strstr (p, delim)
進行一次調用,即可在條件表達式中進行調用以確定其是否存在,例如
if ((p = strstr (p, delim))) {
size_t len = strlen (delim);
char *next = p + len;
if (isspace (*next) || ispunct (*next)) {
printf ("found: '%s' (now what?)\n", ++p);
}
}
如果找到它,只需聲明一個指針(或使用帶有p
數組索引)來訪問" on"
之后的下一個字符。 然后,您可以測試" on"
后面是否是空格,以確認您已找到所需的子字符串。 由於您知道p
指向"on"
之前的space
,因此可以像上面在printf
語句中所做的那樣簡單地將指針p
遞增為指向"on"
本身。 現在,如何處理字符串的其余部分由您決定。 你必須p
指向字符串的開頭,而next
指向空白以下"on"
,所以你可以平凡復制"on"
或NUL,終止於next
-不管它是什么,你需要做的。
綜上所述,您將擁有:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
int main (void) {
char data[]="Jason could you please turn on the TV",
*p = data;
const char *delim = " on";
if ((p = strstr (p, delim))) {
size_t len = strlen (delim);
char *next = p + len;
if (isspace (*next) || ispunct (*next)) {
printf ("found: '%s' (now what?)\n", ++p);
}
}
return 0;
}
使用/輸出示例
$ ./bin/strstr_on
found: 'on the TV' (now what?)
仔細檢查一下,如果您還有其他問題,請與我聯系。
在字符串中查找多個"on"
如以下注釋中所述,如果輸入中有多個"on"
,則您需要做的就是將上述if
語句放入一個循環中,然后設置p = next;
在循環的末尾。 例如,查找以"on"
開頭的所有子字符串所需的唯一更改就是:
char data[]="Jason could you please turn on the TV on the desk",
...
while ((p = strstr (p, delim))) {
size_t len = strlen (delim);
char *next = p + len;
if (isspace (*next) || ispunct (*next)) {
printf ("found: '%s' (now what?)\n", ++p);
}
p = next;
}
使用/輸出查找所有"on"
$ ./bin/strstr_on
found: 'on the TV on the desk' (now what?)
found: 'on the desk' (now what?)
如果您還有其他問題,請告訴我。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.