简体   繁体   English

从CSV文件中读取整数值,如何仅获取记录的最后两个值?

[英]read integer values from a CSV file, how to get only the last two values of the record?

The sample record will be like this, 样本记录将是这样,

14/11/2014,Sh2345,423,10
12/12/2014,AV2345,242,20

From the above record I need only 根据以上记录,我只需要

423,10
242,20

The below code will give me all the row and Column count. 下面的代码将为我提供所有行数和列数。

rowIndex = 0;
columnIndex = 0;
while(fgets(part,1024,fp) != NULL){
    token = NULL;

    while((token = strtok((token == NULL)?part:NULL,",")) != NULL){
        if(rowIndex == 0){ 
            columnIndex++;
        }
        for(idx = 0;idx<strlen(token);idx++){
            if(token[idx] == '\n'){ 
                rowIndex++;
                break;
            }
        }
    } 
}

If you want to use strtok which I believe is the right way to do this kind of things, since fscanf will be very problematic in case of invalid input, then I think this is the way: 如果您想使用strtok (我认为这是执行此类操作的正确方法),因为fscanf在输入无效的情况下会出现很大问题,那么我认为这是这样的:

rowIndex = 0;
while (fgets(part, sizeof part, fp) != NULL)
{
    char *token;
    size_t partLength;
    char *saveptr; // for strtok_r to store it's current state

    partLength = strlen(part);
    /* check if this is a complete line */
    if (part[partLength - 1] == '\n')
        rowIndex++;

    columnIndex = 0;
    token       = strtok_r(part, ",", &saveptr);
    while ((token = strtok_r(NULL, ",", &saveptr)) != NULL)
    {
        char *endptr;

        /* if columnIndex >= 1 then we are in the right columns */
        if (columnIndex >= 1)
            values[columnIndex - 1] = strtol(token, &endptr, 10);
        /* in case the conversion rejected some characters */
        if ((*endptr != '\0') && (*endptr != '\n'))
            values[columnIndex - 1] = -1; /* some invalid value (if it's possible) */
        columnIndex++;
    }
    /* if we have columnIndex == 3, then we've read the two values */
    if (columnIndex == 3)
        printf("(%d, %d)\n", values[0], values[1]);
    /* the last column will not be counted in the while loop */
    columnIndex++;
}

In case of very long lines, for which sizeof part is small enough to leave some , in between, you are going to need some different approach, but as long as the lines fit part you are ok. 在很长的线,对于这种情况下sizeof part小到足以留下一些,在两者之间,你会需要一些不同的方法,但只要线适合part你都OK。

To read the values into an array, maybe this could work: 要将值读入数组,也许可以这样做:

int **fileToMatrix(const char *const filename, int *readRowCount, int *readColumnCount, int skipColumns)
{
    char  part[256];
    FILE *file;
    int   rowIndex;
    int   columnIndex;
    int   index;
    int **values;

    file = fopen(filename, "r");
    if (file == NULL)
        return NULL;
    values   = NULL; /* calling realloc, it behaves like malloc if ptr argument is NULL */
    rowIndex = 0;
    while (fgets(part, sizeof part, file) != NULL)
    {
        char *token;
        int **pointer;
        char *saveptr; // for strtok_r to store it's current state

        /* check if this is a complete line */

        pointer = realloc(values, (1 + rowIndex) * sizeof(int *));
        if (pointer == NULL)
            goto abort;

        values           = pointer;
        values[rowIndex] = NULL;
        columnIndex      = 0;
        token            = strtok_r(part, ",", &saveptr);

        while ((token = strtok_r(NULL, ",", &saveptr)) != NULL)
        {
            columnIndex += 1;
            /* if columnIndex > skipColumns - 1 then we are in the right columns */
            if (columnIndex > (skipColumns - 1))
            {
                int   value;
                char *endptr;
                int  *currentRow;
                int   columnCount;

                endptr = NULL;
                value  = strtol(token, &endptr, 10);
                /* in case the conversion rejected some characters */
                if ((endptr != NULL) && (*endptr != '\0') && (*endptr != '\n'))
                    value = -1;
                /*           ^ some invalid value (if it's possible) */
                columnCount = columnIndex - skipColumns + 1;
                currentRow  = realloc(values[rowIndex],  columnCount * sizeof(int));
                if (currentRow == NULL)
                    goto abort;
                currentRow[columnIndex - skipColumns] = value;
                values[rowIndex]                      = currentRow;
            }
        }
        /* the last column will not be counted in the while loop */
        columnIndex++;
        rowIndex++;
    }
    fprintf(stderr, "%d rows and %d columns parsed\n", rowIndex, columnIndex - skipColumns);
    fclose(file);

    *readRowCount    = rowIndex;
    *readColumnCount = columnIndex - skipColumns;

    return values;

abort:
    *readRowCount    = -1;
    *readColumnCount = -1;

    for (index = rowIndex - 1 ; index >= 0 ; index--)
        free(values[index]);
    free(values);

    fclose(file);
    return NULL;
}

void freeMatrix(int **matrix, int rows, int columns)
{
    int row;
    for (row = 0 ; row < rows ; row++)
        free(matrix[row]);
    free(matrix);
}

void printMatrix(int **matrix, int rows, int columns)
{
    int row;
    int column;
    for (row = 0 ; row < rows ; row++)
    {
        int *currentRow;

        currentRow = matrix[row];
        for (column = 0 ; column < columns ; column++)
            printf("%8d", currentRow[column]);
        printf("\n");
    }
}

int main()
{
    int **matrix;
    int   rows;
    int   columns;

    matrix = fileToMatrix("data.dat", &rows, &columns, 2);
    if (matrix != NULL)
    {
        printMatrix(matrix, rows, columns);
        freeMatrix(matrix, rows, columns);
    }

    return 0;
}

You should also note, that sometimes fields in a CSV file contain " or ' quotes, you might want to remove them from the tokens returned by strtok_r to avoid the failure of strtol . 您还应该注意,有时CSV文件中的字段包含"'引号,您可能希望将其从strtok_r返回的令牌中删除,以避免strtol失败。

int v1, v2;
while(fgets(part,1024,fp) != NULL){
    sscanf(part, "%*[^,],%*[^,],%d,%d", &v1, &v2);//skip 2 field
    //do stuff .. printf("%d,%d\n", v1, v2);
}
int CheckMatrix(int Matrix, int Checkrow, int Checkvalue) /* to check whether the area code existing in the matrix*/
 {
    int i,j;

   for(i=0;i<=Checkrow;i++)
       {
          if( Matrix[i][0]== Checkvalue)
             { 
                return i;
             }
          else
             {
               return -1;
              }
        }
 }

int **fileToMatrix(const char *const filename, int *readRowCount)
{
    char  part[256];
    FILE *file;
    int   rowIndex;
    int   index;
    int **values;
    size_t partLength;
    int v1,v2;
    int CheckValue;


    file = fopen(filename, "r");
    if (file == NULL)
        return NULL;
    values   = NULL; /* calling realloc, it behaves like malloc if ptr argument is NULL */
    rowIndex = 0;
    while (fgets(part, sizeof part, file) != NULL)
    {
        int **pointer;

        /* check if this is a complete line */

        pointer = realloc(values, (1 + rowIndex) * sizeof(int *));
        if (pointer == NULL)
            goto abort;
         partLength = strlen(part);
        /* check if this is a complete line */
        if (part[partLength - 1] == '\n')
        rowIndex++;
        sscanf(part, "%*[^,],%*[^,],%d,%d", &v1, &v2);//skip 2 field to get the Area code and Distance 

        CheckValue = CheckMatrix(Values,rowIndex,V1); //Call the function to check  whether the area code existing or not in the array

         If (CheckValue!=-1) // If existing the current distace will add to the existing and increase the count of areacode.
           { 
             Values[CheckValue][1]=Values[CheckValue][1]+V2;
             Values[CheckValue][2]=Values[CheckValue][2];

            }
        else // If not existing will add to the matrix as new entry.
            {
              Values[CheckValue][0]=V1;
              Values[CheckValue][1]=V2; 
              Values[CheckValue][2]=1;
            }   


    }

    return values;

abort:
    *readRowCount    = -1;
    *readColumnCount = -1;

    for (index = rowIndex - 1 ; index >= 0 ; index--)
        free(values[index]);
    free(values);

    fclose(file);
    return NULL;
}

void freeMatrix(int **matrix, int rows)
{
    int row;
    for (row = 0 ; row < rows ; row++)
        free(matrix[row]);
    free(matrix);
}

void printMatrix(int **matrix, int rows)
{
    int row;
    for (row = 0 ; row < rows ; row++)
    {

         for (column = 0 ; column < 4 ; column++)
            {   
               if (column==3)
               {
                  double Mean = double(matrix[row][1])/double(matrix[row][2]); /* To get the mean */
                  printf("%f",matrix[row][column];
               }  

                printf("%d |", matrix[row][column]);

            }

            printf("\n");
    }
}




# include <stdio.h>
int main()
{
    int **matrix;
    int   rows;

    matrix = fileToMatrix("data.dat", &rows);
    if (matrix != NULL)
    {
        printf("|AreaCode|Total Distace|Area Count|Mean");
        printf("------------------------------------------");
        printMatrix(matrix, rows);
        freeMatrix(matrix, rows);
    }

    return 0;
}

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM