简体   繁体   中英

C parsing data from files

So I'm trying to parsing some data given in a text file into a C program. The text file consists of:

4 4
1 1 0 0
1 1 0 0
0 0 1 1
0 0 1 1

The first two numbers are the row and length needed for the 2D array to store the integers. So far my code is:

        file = fopen(argv[1], "r");
        if (file == NULL)
        {
            perror("File IO error\n");
        }
        else
        {
            while(fgets(line, sizeof(line), file) != NULL)
            {        
                
                if(sscanf(line, "%d %d %d %d", &a, &b, &c, &d) == 4)
                {
                    printf("%d %d %d %d\n", a, b, c, d);
                }
                else
                {
                    sscanf(line, "%d %d", &ROW, &COL);
                    printf("ROW: %d COL: %d\n", ROW, COL);
                }

My problem I'm having is that there are other text files that need to be inputted. They are not the same rows and colums as the above one. Eg:

15 15
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 1 1 0 0 0 1 1 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 0 0 0 1 0 1 0 0 0 0 1 0
0 1 0 0 0 0 1 0 1 0 0 0 0 1 0
0 1 0 0 0 0 1 0 1 0 0 0 0 1 0
0 0 0 1 1 1 0 0 0 1 1 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 1 1 0 0 0 1 1 1 0 0 0
0 1 0 0 0 0 1 0 1 0 0 0 0 1 0
0 1 0 0 0 0 1 0 1 0 0 0 0 1 0
0 1 0 0 0 0 1 0 1 0 0 0 0 1 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 1 1 0 0 0 1 1 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

As my code is only for 4 integers each line, it wouldn't work on a larger or smaller file. How would I make it dynamic and parse according to the size. Thanks

Read the data one integer at a time into a buffer. If you really want a 2d array, you will have to either copy the data or pay attention to the first 2 data points and allocate the array after you read them. However, the practice of prefixing a data stream with the number of elements that will be in the stream is an anti-pattern, and you don't really want a 2d array. You just want to be able to write data[i][j] to access the data in the buffer as if it were a 2d array. Also, don't bother with file I/O. Read from your input stream and write to your output stream and let the shell handle redirections. (eg, instead of cmd input-file , execute cmd < input-file ). Dynamically growing your buffers is a basic task that you should practice. For example:

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>

struct int_buf {
    int *data;
    size_t cap;
    size_t len;
};


void *xrealloc(void *b, size_t num, size_t siz);
static void
push(struct int_buf *b, int v)
{
    while( b->cap <= b->len ){
        b->data = xrealloc(b->data, b->cap += 128, sizeof *b->data);
    }
    b->data[b->len++] = v;
}


static void
die(const char *msg)
{
    fputs(msg, stderr);
    exit(EXIT_FAILURE);
}


int
main(int argc, char **argv)
{
    struct int_buf b = {NULL, 0, 0};
    int v;
    int rows, columns;

    if( scanf("%d %d", &rows, &columns) != 2 ){
        die("invalid row/column count\n");
    }
    while( scanf("%d", &v) == 1 ){
        push(&b, v);
    }
    if( ! feof(stdin) ){
        die(ferror(stdin) ? "input error\n" : "invalid data\n");
    }
    if( b.len != rows * columns ){
        die("inconsistent data\n");
    }
    int (*data)[columns] = (void *)b.data;

    for( int r = 0; r < rows; r += 1 ){
        printf(" row %3d: ", r);
        for( int c = 0; c < columns; c += 1 ){
            printf(" %3d", data[r][c]);
        }
        putchar('\n');
    }
}


void *
xrealloc(void *b, size_t num, size_t siz)
{
    b = realloc(b, num * siz);
    if( b == NULL ){
        perror("realloc");
        exit(EXIT_FAILURE);
    }
    return b;
}

In this particular use case, the first two data point are not needed. For this sample, we've used them so that we don't need to worry about newlines in the input stream so that we can use scanf ( scanf is a terrible tool that should never be used, but that's a different discussion. Here I'll just point out that on certain inputs, the above program will exhibit undefined behavior. You can avoid that with "%12d" or similar, but then you need to know the maximum size of an integer to decide what value to use. The language guarantees are very conservative, so unless you want to dynamically build a format string or generate platform specific code, you would have to use something ridiculously small like %4d ). We could just as easily count newlines as we read the stream and just discard the first 2 data points.

if your text file is formatted. you can use strtok and strtol

#include<stdio.h>
#include<string.h>
#include <stdlib.h>
int main() {
    char payload[] = {"1 2 3 4"};
    char *token = strtok(payload, " ");
    while (token) {
        int v = (int)strtol(token,NULL,10);
        printf("%d\n",v);
        token = strtok(NULL, " ");
    }
    return 0;
}

You can just run scanf in loop like this:

int rows, cols;
if(fscanf(file, "%d %d", &rows, &cols) != 2) {
    // error
}

int num;
for(int i = 0; i<rows; ++i) {
    for(int j = 0; j < (cols - 1); ++j) {
        if(fscanf(file, "%d", &num) == 1) {
            printf("%d ", num);
        }
        else { /* error */ }
    }
    if(fscanf(file, "%d", &num) == 1) {
        printf("%d\n", num); // last col handled differently
    } 
    else { /* error */ }
}

If you want to receive whole row at one, you can just use int array (no need for string, like in your example)

int* row = malloc(sizeof(int) * cols); // allocate memory for array
if(row == NULL) {/* error */}
for(int i = 0; i<rows; ++i) {
    for(int j = 0; j < cols; ++j) {
        if(fscanf(file, "%d", &row[j]) != 1) { /* error */}
    }
    // do what you want with your row
}
free(row); // release array memory

In order to read files with different number of rows and columns, I suggest that you allocate memory dynamically after having read the first line containing the number of rows and columns.

Example:

#include <stdio.h>
#include <stdlib.h> // malloc/free

int main(int argc, char* argv[]) {
    if(argc != 2) return 1;

    FILE *file = fopen(argv[1], "r");
    if (file == NULL) {
        perror("fopen");
        return 1;
    }

    int ROW, COL;
    if(fscanf(file, "%d %d", &ROW, &COL) != 2 || ROW < 1 || COL < 1) {
        fprintf(stderr, "invalid file format\n");
        return 1;
    }

    // allocate the needed memory to store the data:
    int (*arr)[COL] = malloc(ROW * sizeof *arr);
    if(arr == NULL) {
        perror("malloc");
        return 1;
    }

    // read the data from file into your dynamically allocated array:
    for(int row = 0; row < ROW; ++row) {
        for(int col = 0; col < COL; ++col) {
            if(fscanf(file, " %d", &arr[row][col]) != 1) {
                fprintf(stderr, "invalid file format\n");
                fclose(file);
                free(arr);
                return 1;
            }
        }
    }
    fclose(file);

    // display the result:
    printf("ROW: %d COL: %d\n", ROW, COL);
    for(int row = 0; row < ROW; ++row) {
        for(int col = 0; col < COL; ++col) {
            printf("%d ", arr[row][col]);
        }
        putchar('\n');
    }

    free(arr); // release the memory allocated dynamically
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM