简体   繁体   中英

I have a string str which has two array (can be more) that I would like to convert to integer arrays in C

As an Example, Input:

c[] = "[1,2,3][5,7,8]"

Output:

a = [1,2,3] //of type int a[]
b = [5,7,8] //of type int b[]

I have tried using strtok to remove "]". But, when I use strtok next time, I am not able to use it. If I try to print the output, I get

[1,2,3
[1
2
3

instead of

[1,2,3
[1
2
3
[5,7,8
[5
7
8

Code that I have so far

char c[] = "[1,2,3][5,7,8]"; 

char *token = strtok(c, "]");
for (token; token != NULL ; token = strtok(NULL, "]")){
    puts(token);
    char *comma = strtok(c, ",");
    for (comma; comma != NULL; comma = strtok(NULL, ",")){
        puts(comma);
    }
}

Your problem is, that strtok has a memory. First time that you pass in a string, it is remembered and then used again and again as long as you pass in NULL as first parameter.

However, within your loop, you call strtok again with a parameter. So this new string (which is the first token only) is placed in strtok's memory, and after it is processed completely in the inner loop, there is nothing left to tokenize in the outer loop.

Have a look at this thread , it explains more detailed how strtok works.

However, you are lucky: strtok is manipulating the string you first passed in place (this is why you have to pass the string to be tokenized as char* , but the delimiters can be a const char* ). So you can do this:

char c[] = "[1,2,3][5,7,8]";

char* next = c;
char* token;
while((token = strtok(next, "]")))
{
    puts(token);
    next += strlen(token) + 1; // move next behind the token
    token = strtok(token, ",");
    do
    {
        puts(token);
    }
    while((token = strtok(NULL, ",")));
}

If you are wondering about the extra parentheses, these are to prevent a warning in the compiler ("possible assignment instead of comparison").

This solution has two nested loops of strtok_s , because strtok is not re-entrant. This is MSVC, some systems implement the similar strtok_r .

I have created output in accordance with the top of your question, this can be modified to suit other output, it was not very clear. In this case, it was not really necessary to have two nested loops, but your subsequent examples confuse the issue by breaking up the comma'ed input.

#include <stdio.h>
#include <string.h>

int main(void) {
    char c[] = "[1,2,3][5,7,8]"; 
    char *tok1 = NULL;
    char *tok2 = NULL;
    char *end1 = NULL;
    char *end2 = NULL;
    int comma = 0;
    char identifier = 'a';

    tok1 = strtok_s(c, "[]", &end1);
    while(tok1 != NULL) {                       // outer loop splitting [bracket] parts
        printf("%c = [", identifier);
        comma = 0;                              // control comma output
        tok2 = strtok_s(tok1, ",", &end2);
        while(tok2 != NULL) {                   // inner loop splitting ,comma, parts
            if(comma) {                         // check if comma required
                printf(",");
            }
            printf("%s", tok2);
            comma = 1;                          // a comma will be needed
            tok2 = strtok_s(NULL, ",", &end2);
        }
        printf("] //of type int %c[]\n", identifier);
        identifier++;
        tok1 = strtok_s(NULL, "[]", &end1);
    }
    return 0;
}

The simpler program where you don't need to examine within the [brackets] is

#include <stdio.h>
#include <string.h>

int main(void) {
    char c[] = "[1,2,3][5,7,8]"; 
    char *tok = NULL;
    char identifier = 'a';

    tok = strtok(c, "[]");
    while(tok != NULL) {
        printf("%c = [%s] //of type int %c[]\n", identifier, tok, identifier);
        identifier++;
        tok = strtok(NULL, "[]");
    }
    return 0;
}

In both cases the output is:

a = [1,2,3] //of type int a[]
b = [5,7,8] //of type int b[]

EDIT altered the second example to give output as per OP's recent comment above.

#include <stdio.h>
#include <string.h>

int main(void) {
    char c[] = "[1,2,3][5,7,8]"; 
    char *tok = NULL;
    char identifier = 'a';

    tok = strtok(c, "[]");
    while(tok != NULL) {
        printf("int %c[] = { %s };\n", identifier, tok, identifier);
        identifier++;
        tok = strtok(NULL, "[]");
    }
    return 0;
}

Program output:

int a[] = { 1,2,3 };
int b[] = { 5,7,8 };

If you are converting a string of character digits to an array of integer values, one character per value (or allowing a - before any character digit to indicate a negative value for your array), you may be better off writing a simple function to step though the string and perform your conversions manually.

An example using array indexing of the string could be written a follows. You could easily change the array index notations to pointer notation which is more intuitive to some.

#include <stdio.h>
#include <string.h>

size_t str2arr (char *d, size_t max, char *s, size_t *ofs);

int main (int argc, char **argv) {

    char c[] = "[1,2,3][5,7,8]";
    char *p = argc > 1 ? argv[1] : c;
    size_t i, offset = 0, na = 0, nb = 0, nchr = strlen (p);
    char a[nchr], b[nchr];

    memset (a, 0, nchr * sizeof *a);  /* zero each VLA */
    memset (b, 0, nchr * sizeof *b);

    na = str2arr (a, nchr, p, &offset);          /* convert first segment */
    nb = str2arr (b, nchr, p + offset, &offset); /* convert second segment */

    for (i = 0; i < na; i++)                     /* output results */
        printf (" a[%2zu] : % d\n", i, a[i]);
    putchar ('\n');

    for (i = 0; i < nb; i++)
        printf (" b[%2zu] : % d\n", i, b[i]);
    putchar ('\n');

    return 0;
}

/** convert a string of characters to an array of values
 *  including accounting for negative values. the destination
 *  index `di` returns the number of characters conversions, the
 *  offset of the next segment within 's' is updated in pointer 'ofs'
 */
size_t str2arr (char *d, size_t max, char *s, size_t *ofs)
{
    if (!d || !s || !*s) return 0;  /* validate input */
    size_t di = 0, neg = 0;
    register size_t si = 0;

    for (; di < max && s[si]; si++, di++) {       /* for each character */
        if (s[si] == ']') break;
        while (s[si] && (s[si] < '0' || ('9' < s[si]))) { /* if not digit */
            if (s[si] == '-') neg = 1;          /* if '-' sign, set flag */
            else neg = 0;              /* clear if not last before digit */
            si++;
        }
        if (!s[si]) break;                 /* validate not end of string */
        d[di] = neg ? -(s[si] - '0') : s[si] - '0';  /* convert to digit */
        neg = 0;                                     /* reset flag */
    }

    *ofs = si + 1;  /* update offset before return  */

    return di;      /* return number of conversions */
}

Example Use/Output

$ ./bin/str2arr
 a[ 0] :  1
 a[ 1] :  2
 a[ 2] :  3

 b[ 0] :  5
 b[ 1] :  7
 b[ 2] :  8

$ ./bin/str2arr "[1,2,3,4][5,6,-5,7,-1,8,9,2]"
 a[ 0] :  1
 a[ 1] :  2
 a[ 2] :  3
 a[ 3] :  4

 b[ 0] :  5
 b[ 1] :  6
 b[ 2] : -5
 b[ 3] :  7
 b[ 4] : -1
 b[ 5] :  8
 b[ 6] :  9
 b[ 7] :  2

Look it over, compare this approach to the other answers. In C, you have as much fine-grain-control over how you parse data as you want to exercise. If you have no need to handle negative values, then the implementation is much simpler. Let me know if you have any questions.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM