简体   繁体   中英

Reading in a multi-line string from a file using sscanf in C

I've got a text file that has information about a specific record, and the format of the file looks like this:

Title: Void
Artist: RL Grime
Year: 2014
Genre: Bass
Label: We Did It
Price: 14.95
----
Title: Mssingno EP
Artist: Mssingno
Year: 2013
Genre: Grime / Garage
Label: Goon Club Allstars
Price: 10.00
----

I'm trying to write a program that can parse the data from this using sscanf() and then store each collection into a single struct called Record . Record looks like this:

typedef struct
{
    char    title[80];
    char    artist[80];
    int     year;
    char    genre[80];
    char    label[80];
    double  price;
} Record;

Record record_inventory[MAX_RECORDS];

But I'm wondering what I can do to actually parse this data. I can do a single line, but I don't know how I can do a whole paragraph with sscanf() .

For example, a single line (with the file im reading from being ifp ) would look like this:

char input[1024];
while(fgets(input, 1024, ifp))
{
    sscanf(input, "Title: %[^,]", record_inventory[0].title);
}

But obviously since input would only hold a single line, I wouldn't be able to add on to the sscanf in any way. Is there any efficient way to get four lines with fgets and then parse it besides alternating between fgets and sscanf four times?

#include <stdio.h>
#include <string.h>

#define MAX_RECORDS 64

typedef struct {
    char    title[80];
    char    artist[80];
    int     year;
    char    genre[80];
    char    label[80];
    double  price;
} Record;

Record record_inventory[MAX_RECORDS];

int main(void){
    char input[128], rec[6*128] = "";
    int n = 0;
    FILE *ifp = fopen("data.txt", "r");


    while(fgets(input, sizeof input, ifp)){
        if(strncmp(input, "----", 4)==0){
            if(6!=sscanf(rec ,
                "Title: %79[^\n] "
                "Artist: %79[^\n] "
                "Year: %d "
                "Genre: %79[^\n] "
                "Label: %79[^\n] "
                "Price: %lf",
                record_inventory[n].title,
                record_inventory[n].artist,
                &record_inventory[n].year,
                record_inventory[n].genre,
                record_inventory[n].label,
                &record_inventory[n].price)){
                fprintf(stderr, "bad format!\n");
            } else {
                if(++n == MAX_RECORDS){
                    fprintf(stderr, "full.\n");
                    break;
                }
            }
            *rec = '\0';
        } else {
            strcat(rec, input);//strcat up to ----
        }
    }
    fclose(ifp);
    //check print
    for(int i = 0; i < n; ++i){
        printf("%s : %f\n", record_inventory[i].title, record_inventory[i].price);
    }
    return 0;
}

The data you show is nice and consistent — 6 data lines and an end-of-record (EOR) marker, with the fields in the same order. It isn't clear whether it is safe to assume that the data will all be as well disciplined. Let's assume, pro tem, that it is. Then you need to read and accumulate lines up to the EOR, then process the resulting data.

You've also not stipulated whether you're on a platform which supports POSIX getline() . I'm going to assume you are because it makes life simpler. You can do it with fgets() if necessary.

You can read to EOR using code like:

static size_t max(size_t x, size_t y) { return (x > y) ? x : y; }

char *get_record(FILE *fp, const char *eor)
{
    char *ibuffer = 0;
    size_t ibuflen = 0;
    char *obuffer = 0;
    size_t obuflen = 0;
    size_t omaxlen = 0;
    ssize_t ilen;
    size_t eorlen = strlen(eor);

    while ((ilen = getline(&ibuffer, &ibuflen, fp)) != -1)
    {
        if (obuflen + ilen + 1 >= omaxlen)
        {
            size_t nbuflen = max(obuflen * 2, obuflen + ilen + 1);
            void *nbuffer = realloc(obuffer, nbuflen);
            if (nbuffer == 0)
            {
                free(ibuffer);
                free(obuffer);
                return 0;
            }
            obuffer = nbuffer;
            omaxlen = nbuflen;
        }
        memmove(obuffer + obuflen, ibuffer, ilen + 1);
        obuflen += ilen;
        if (strncmp(ibuffer, eor, eorlen) == 0 && ibuffer[eorlen] == '\n')
            break;
    }
    free(ibuffer);
    return obuffer;
}

/* Test harness for get_record() */
int main(void)
{
    char *buffer;

    while ((buffer = get_record(stdin, "----")) != 0)
    {
        printf("[[%s]]\n", buffer);
        free(buffer);
    }

    return 0;
}

Source file getrec.c

That can then be extended to process the structure from the record like this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum { MAX_RECORDS = 20 };
enum { MAX_TAG = 20 };

typedef struct
{
    char    title[80];
    char    artist[80];
    int     year;
    char    genre[80];
    char    label[80];
    double  price;
} Record;

static Record record_inventory[MAX_RECORDS];
static size_t n_rec = 0;

static size_t max(size_t x, size_t y) { return (x > y) ? x : y; }

extern char *get_record(FILE *fp, const char *eor);
extern int scan_record(const char *buffer, Record *record);
extern void print_record(size_t i, const Record *record);

char *get_record(FILE *fp, const char *eor)
{
    char *ibuffer = 0;
    size_t ibuflen = 0;
    char *obuffer = 0;
    size_t obuflen = 0;
    size_t omaxlen = 0;
    ssize_t ilen;
    size_t eorlen = strlen(eor);

    while ((ilen = getline(&ibuffer, &ibuflen, fp)) != -1)
    {
        if (obuflen + ilen + 1 >= omaxlen)
        {
            size_t nbuflen = max(obuflen * 2, obuflen + ilen + 1);
            void *nbuffer = realloc(obuffer, nbuflen);
            if (nbuffer == 0)
            {
                free(ibuffer);
                free(obuffer);
                return 0;
            }
            obuffer = nbuffer;
            omaxlen = nbuflen;
        }
        memmove(obuffer + obuflen, ibuffer, ilen + 1);
        obuflen += ilen;
        if (strncmp(ibuffer, eor, eorlen) == 0 && ibuffer[eorlen] == '\n')
            break;
    }
    free(ibuffer);
    return obuffer;
}

static int scan_tag(const char *tag, const char *data)
{
    int pos;
    char fmtstr[MAX_TAG];
    sprintf(fmtstr, " %%%d[^:]:%%n", MAX_TAG - 1);
    char tagstr[MAX_TAG];
    if (sscanf(data, fmtstr, tagstr, &pos) != 1)
        return 0;
    if (strcmp(tagstr, tag) != 0)
        return 0;
    return pos + 1;
}

static size_t scan_string(const char *tag, const char *data, char *buffer, size_t buflen)
{
    int pos1 = scan_tag(tag, data);
    if (pos1 == 0)
        return 0;

    char fmtstr[MAX_TAG];
    int pos2;
    sprintf(fmtstr, " %%%zu[^\n]%%n", buflen - 1);
    if (sscanf(data + pos1, fmtstr, buffer, &pos2) != 1)
        return 0;
    return (size_t)(pos1 + pos2);
}

static size_t scan_integer(const char *tag, const char *data, int *int_val)
{
    int pos1 = scan_tag(tag, data);
    if (pos1 == 0)
        return 0;

    int pos2;
    if (sscanf(data + pos1, "%d%n", int_val, &pos2) != 1)
        return 0;
    return (size_t)(pos1 + pos2);
}

static size_t scan_double(const char *tag, const char *data, double *dbl_val)
{
    int pos1 = scan_tag(tag, data);
    if (pos1 == 0)
        return 0;

    int pos2;
    if (sscanf(data + pos1, "%lf%n", dbl_val, &pos2) != 1)
        return 0;
    return (size_t)(pos1 + pos2);
}

int scan_record(const char *buffer, Record *record)
{
    size_t offset = 0;
    const char *scan_pos = buffer + offset;

    if ((offset = scan_string("Title", scan_pos, record->title, sizeof(record->title))) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_string("Artist", scan_pos, record->artist, sizeof(record->artist))) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_integer("Year", scan_pos, &record->year)) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_string("Genre", scan_pos, record->genre, sizeof(record->genre))) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_string("Label", scan_pos, record->label, sizeof(record->label))) == 0)
        return -1;
    scan_pos += offset;
    if ((offset = scan_double("Price", scan_pos, &record->price)) == 0)
        return -1;
    return 0;
}

void print_record(size_t i, const Record *record)
{
    printf("Record:  %zu\n", i);
    printf("Title:   %s\n", record->title);
    printf("Artist:  %s\n", record->artist);
    printf("Year:    %4d\n", record->year);
    printf("Genre:   %s\n", record->genre);
    printf("Label:   %s\n", record->label);
    printf("Price:   %.2f\n", record->price);
    putchar('\n');
}

int main(void)
{
    char *buffer;
    int rc = 0;

    while ((buffer = get_record(stdin, "----")) != 0 && rc == 0)
    {
        printf("Input %zu: [[%s]]\n", n_rec, buffer);
        rc = scan_record(buffer, &record_inventory[n_rec++]);
        free(buffer);
    }

    for (size_t i = 0; i < n_rec; i++)
        print_record(i, &record_inventory[i]);

    return 0;
}

Example input file data

Title: Void
Artist: RL Grime
Year: 2014
Genre: Bass
Label: We Did It
Price: 14.95
----
Title: Mssingno EP
Artist: Mssingno
Year: 2013
Genre: Grime / Garage
Label: Goon Club Allstars
Price: 10.00
----

Sample output

$ ./getrec < data
Input 0: [[Title: Void
Artist: RL Grime
Year: 2014
Genre: Bass
Label: We Did It
Price: 14.95
----
]]
Input 1: [[Title: Mssingno EP
Artist: Mssingno
Year: 2013
Genre: Grime / Garage
Label: Goon Club Allstars
Price: 10.00
----
]]
Record:  0
Title:   Void
Artist:  RL Grime
Year:    2014
Genre:   Bass
Label:   We Did It
Price:   14.95

Record:  1
Title:   Mssingno EP
Artist:  Mssingno
Year:    2013
Genre:   Grime / Garage
Label:   Goon Club Allstars
Price:   10.00

$

Dealing with variable order of the record elements, or missing elements, or extra elements, is left as an exercise. The scan_tag() function would need to return the tag it finds, rather than checking that it is given the right tag. The calling code would have to be adaptive to what has been found, reporting errors if the same tag appears several times, etc.

The code shown is not good about reporting errors. It spots them OK, but it doesn't report what the trouble was. There are many ways to do that; you'd have to decide what's best for you.

Make sure you understand what is going on throughout this code before handing it in; some of it is not the sort of thing beginners write.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM