如何读取文本文件中的逗号分隔行并将其字段插入到结构指针数组中？

Question

I've been trying to figure this one out for a while now, and I feel like I have to be close.一段时间以来，我一直试图弄清楚这个问题，我觉得我必须接近它。 Basically, I have a data file containing various country records separated by new lines.基本上，我有一个包含由新行分隔的各种国家/地区记录的数据文件。 Each record contains comma separated fields, of which I am trying to extract certain ones.每条记录都包含逗号分隔的字段，我试图从中提取某些字段。

For example (as a single line):例如（作为单行）：

60,AFG,Afghanistan,Asia,Southern and Central Asia,652090,1919,22720000,45.9,5976,Afganistan/Afqanestan,Islamic Emirate,Mohammad Omar,1,AF 60,AFG,Afghanistan,Asia,Southern and Central Asia,652090,1919,22720000,45.9,5976,Afganistan/Afqanestan,Islamic Emirate,Mohammad Omar,1,AF

Each one of these lines will make up a struct.这些行中的每一行都将构成一个结构。 Essentially, I want to read each one of these lines and insert it into an array of struct pointers (so dynamically).本质上，我想读取这些行中的每一行并将其插入到结构指针数组中（如此动态）。 I also only want specific fields.我也只想要特定的领域。 When I "tokenize" the line I want the fields for code, name, population, and life expec.当我“标记”该行时，我想要代码、姓名、人口和预期寿命的字段。 respectively:分别：

AFG, Afghanistan, 22720000, 45. AFG, 阿富汗, 22720000, 45。

My thought was to use fgets() to read each line in the file, and in a loop malloc() some memory for the pointers, tokenize on the fields I want, then insert.我的想法是使用 fgets() 读取文件中的每一行，并在循环 malloc() 中为指针提供一些内存，对我想要的字段进行标记，然后插入。 However, something that I'm doing must be wrong, as various tests don't seem to show anything in my output.但是，我正在做的事情一定是错误的，因为各种测试似乎没有在我的输出中显示任何内容。

Here is my work thus far.这是我迄今为止的工作。 I would appreciate any and all help.我将不胜感激任何帮助。

#include "allheaders.h" // contains all common headers for personal use

#define BUF_SIZE 512
#define NUM_RECS 238

typedef struct {
   char code[4];
   char name[40];
   int population;
   float lifeExpectancy;
} Country;

typedef Country *countryPtr;

int main( int argc, const char* argv[] ) {

/* Opening the file */
FILE *filePtr;  // pointer to file
if ((filePtr = fopen("AllCountries.dat", "r")) == NULL) {   // if couldn't open file
    printf("Error opening file\n"); // error message
    exit(1);
}

/* Reading the file */
char buffer[BUF_SIZE];  // buffer to read
int index = 0;
char *token;
countryPtr *myCountries = malloc(sizeof(*myCountries) * NUM_RECS);
for(int i = 0; i < NUM_RECS; ++i) {
    myCountries[i] = malloc(sizeof(*myCountries[i]));
}

while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {

    token = strtok(buffer,",");
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->code, token);
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->name, token);
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    myCountries[index]->population = atoi(token);
    token = strtok(NULL, ",");
    myCountries[index]->lifeExpectancy = atof(token);
    //printf("%s", buffer);
    index++;
}

printf("%s", myCountries[1]->code); // test?
free(myCountries);

} }

Answer 1

Have a look at the following.看看下面的内容。 In the first instance you will need to do some work to improve the areas marked NYI首先，您需要做一些工作来改善标记为 NYI 的区域

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>

#define BUF_SIZE 512
#define NUM_RECS 238

typedef struct {
  char code[4]; // NYI - magic numbers
  char name[41]; // NYI - magic numbers
  int population; // NYI - what if atoi fails? 
  float lifeExpectancy; // NYI - what if atof fails?
} Country;

typedef Country* countryPtr;

int main( int argc, const char* argv[] ) {
  /* Opening the file */
  FILE *filePtr;  // pointer to file
  if ((filePtr = fopen("a.txt", "r")) == NULL) {   // if couldn't open file
    printf("Error opening file\n"); // error message
    exit(1);
  }

  /* Reading the file */
  char buffer[BUF_SIZE];  // buffer to read
  int index=0;
  char *token; // NYI - initial value
  countryPtr* myCountries = calloc(NUM_RECS, sizeof(countryPtr));
  for(int i = 0; i < NUM_RECS; ++i) {
    myCountries[i] = calloc(1, sizeof(Country));
  }

  while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {
    // NYI - magic lengths / overflow strcpy targets

    token = strtok(buffer,","); // NYI - This is probably not the best way to do this. At least fold into a loop.
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->code, token);
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->name, token);
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");

    myCountries[index]->population = atoi(token); // NYI - atoi failure
    token = strtok(NULL, ",");
    myCountries[index]->lifeExpectancy = atof(token); // NYI - atof failure
    printf("%s", buffer);
    index++;
  }

  printf("%s\n", myCountries[0]->code); // test? NYI - need more proof
  free(myCountries); // NYI - this is a sequence - need to free each of the new elements 
}

Answer 2

I took a different approach to solving it based on your code and data file.我根据您的代码和数据文件采用了不同的方法来解决它。 I tested it.我测试了它。 It works with a file of the record type you showed.它适用于您显示的记录类型的文件。 Hopefully it will explain some things and make your work easier and give you a good place to work from.希望它能解释一些事情，让您的工作更轻松，并为您提供一个工作的好地方。

I don't like to write programs in a way that has to pre-count (time consuming) or pre-know the number of records in a file on general principles except maybe in rare cases.我不喜欢以一种必须预先计算（耗时）或预先知道文件中记录数的方式编写程序，除非在极少数情况下。 So when reading files I prefer to allocate memory as I go.所以在读取文件时，我更喜欢边走边分配内存。 Now if there's a big file and a lot of data, then you have to come up with a better memory management scheme than to keep it all in memory.现在，如果有一个大文件和大量数据，那么您必须想出一个更好的内存管理方案，而不是将它们全部保存在内存中。 At some point you're better off going with a canned db solution of some sort.在某些时候，您最好使用某种罐头数据库解决方案。 MySQL, an API, library, parser, etc... but this should work for small files. MySQL、API、库、解析器等……但这应该适用于小文件。

Usually in C on UNIX, exit(0) means success, exit(-1) means failure.通常在 UNIX 上的 C 中， exit(0)表示成功， exit(-1)表示失败。 Also since your country codes were 3 characters, the field to hold it has to be at least 4 characters for the trailing '\\0'此外，由于您的国家/地区代码是 3 个字符，因此保存它的字段必须至少为 4 个字符的尾随 '\\0'

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <strings.h>

#define MAXRECL   512
#define MAXFIELDS 100
#define MAXFIELDL 80

// Field indicies

#define COUNTRY_CODE    1
#define COUNTRY_NAME    2
#define POPULATION      7
#define LIFE_EXPECTANCY 8

#define CCMAX           3
#define CNMAX           40

typedef struct Country {
   struct Country *next;
   char  code[CCMAX + 1];  // (Need room for trailing '\0')
   char  name[CNMAX + 1];  // (Need room for trailing '\0')
   int   population;
   float lifeExpectancy;
} country_t;

country_t *countryRecords;

int main( int argc, const char* argv[] ) {

    FILE *fp;
    if ((fp = fopen("AllCountries.dat", "r")) == NULL) {  
        printf("Error opening file\n"); 
        exit(-1);
    }
    int totalCountries = 0;
    char buf[MAXRECL];
    char fields[MAXFIELDS][MAXFIELDL];
    country_t *prev_country = NULL;
    while (fgets(buf, MAXRECL, fp) != NULL) {
        ++totalCountries;      
        country_t *country = calloc(sizeof(struct Country), 1);
        if (country == NULL) {
            fprintf(stderr, "Out of memory\n");
            exit(-1);
        }
        char *field = strtok(buf, ",");
        int i = 0;
        while(field != NULL) {
          strncpy(fields[i++], field, MAXFIELDL);
          field = strtok(NULL, ",");
        }        
        strcpy(country->code, fields[COUNTRY_CODE]);
        strcpy(country->name, fields[COUNTRY_NAME]);
        country->population = atoi(fields[POPULATION]);
        country->lifeExpectancy = atof(fields[LIFE_EXPECTANCY]);

        if (countryRecords == NULL)
            countryRecords = country;
        else 
            prev_country->next = country;
        prev_country = country;  
    }
    printf("Total countries: %d\n", totalCountries);

    country_t *country = countryRecords;
    while(country != NULL) {
        printf("%3s %30s  Population: %7d Life Expectancy: %5.2f\n",
            country->code, country->name, country->population, country->lifeExpectancy); 
        country_t *prev_country = country;
        country = country->next;
        free(prev_country);
    }
    printf("Done\n");
    exit(0);
}

如何读取文本文件中的逗号分隔行并将其字段插入到结构指针数组中？

问题描述

2 个解决方案

解决方案1
1 2017-01-31 04:28:47

解决方案2
0 已采纳 2017-01-31 05:07:17

如何读取文本文件中的逗号分隔行并将其字段插入到结构指针数组中？

问题描述

2 个解决方案

解决方案1 1 2017-01-31 04:28:47

解决方案2 0 已采纳 2017-01-31 05:07:17

解决方案1
1 2017-01-31 04:28:47

解决方案2
0 已采纳 2017-01-31 05:07:17