简体   繁体   中英

reading lines of fixed size from a file in c

I want to process a file line by line in c, all lines in the file must be of length 100 characters if the line exeed this or the line is empty i want to print the number of the line in error and continu to the next line.

i'm using this but it doesn't work:

int maxLineLen = 101; // 100 + 1 for the '\n' end of line
char myBuffer[101];
FILE *myFile;

myFile = fopen("dataFile.txt", "r");

while (fgets(myBuffer, maxLineLen, myFile) != NULL) {
     // I can't figure out how to detect and print empty or error lines
}

Thank's fro the help.

Edit : I added this sample of my file :

                                                            // Empty line : Wrong line
FirstName-Paolo-LastName-Roberto-Age-23-Address-45,abcdefghijklmnopqrst-CustomerId-xxxxxxxxxxxxxxxx // Correct line
FirstName-Juliana-LastName-Mutti-Age-35-Address-28,abcdefghijklmnopqrst-CustomerId-xxxxxxxxxxxxxxxABCDEFGHIJx // Exeed the length : Wrong line
FirstName-David-LastName-Lazardi-Age-59-Address-101,abcdefghijklmnopqrst-CustomerId // Short length : Wrong line

When i run my program i should get :

Line 1 : ERROR
Line 3 : ERROR
Line 4 : ERROR

Try this:

int maxLineLen = 101; // 100 + 1 for the '\n' end of line
int i = 0;
int len;
char myBuffer[101];
FILE *myFile;

myFile = fopen("dataFile.txt", "r");

while (fgets(myBuffer, maxLineLen, myFile) != NULL) {
     i++;
     len = strlen(myBuffer);
     if(len != 100) {
         printf("Error on line %u : expected 100 but got %u\n", i, len);
     }
}

http://www.cplusplus.com/reference/clibrary/cstdio/fgets/

根据需要尝试fgetc() (或fgetwc() )。

Since you need to detect both underlength and overlength lines reliably, and resynchronize you input after either, it is probably easiest to write a function that uses getc() to read the data.

Your standard function options include:

  • fgets() — won't read too much data, but you'd have to determine whether it got a newline (which would be included in the input) and deal with resynchronization when reading an over-length line (not very difficult).
  • fread() — will read exactly the right length, and would be a good choice if you think overlength and underlength lines will be vanishingly rare occurrences. Resynchronization after an error is anything but trivial, especially if you get adjacent erroneous lines.
  • getline() — from POSIX 2008. Allocates sufficient memory for the length of line it reads, which is a little wasteful if you're simply going to discard over-length lines.

Because they aren't suitable, you end up writing your own.

Now tested code. (The fix was needed in the first if as diagnosed by Dave . The trouble was I originally wrote the inverse condition ( if ((c = getc(fp)) != EOF && c != '\\n') ), then got distracted after I inverted the logic, leading to an 'incomplete inversion' of the condition.)

The key parts of this are the two while loops.

The first while loop reads to the end of the line, storing the data and counting characters — the normal operation. If the line is the right length, the loop will be broken when the newline is read. Note the <= condition; if you consider the loop when linelen == 1 , you will see that <= is correct here even though < is more usual. If the line is short, the count will indicate that.

The second while loop deals with overlong lines, reading to the end of the line and discarding the results. It uses x instead of c because c is needed in the return statement.

/*
@(#)File:           $RCSfile: rdfixlen.c,v $
@(#)Version:        $Revision: 1.2 $
@(#)Last changed:   $Date: 2012/04/01 00:15:43 $
@(#)Purpose:        Read fixed-length line
@(#)Author:         J Leffler
*/

/* Inspired by https://stackoverflow.com/questions/9957006 */

#include <stdio.h>
#include <assert.h>

extern int read_fixed_length_line(FILE *fp, char *buffer, int linelen);

/* Read line of fixed length linelen characters followed by newline. */
/* Buffer must have room for trailing NUL (newline is not included). */
/* Returns length of line that was read (excluding newline), or EOF. */
int read_fixed_length_line(FILE *fp, char *buffer, int linelen)
{
    int count = 0;
    int c;
    assert(fp != 0 && buffer != 0 && linelen > 0);
    while (count < linelen)
    {
        if ((c = getc(fp)) == EOF || c == '\n')
            break;
        buffer[count++] = c;
    }
    buffer[count] = '\0';
    if (c != EOF && c != '\n')
    {
        /* Gobble overlength characters on line */
        int x;
        while ((x = getc(fp)) != EOF && x != '\n')
            count++;
    }
    return((c == EOF) ? EOF : count);
}

#ifdef TEST

#include "posixver.h"
#include <stdarg.h>
#include <unistd.h>
#include <string.h>

int main(void)
{
    enum { MAXLINELEN = 10 };
    int actlen;
    char line[16];
    int lineno = 0;
    memset(line, '\0', sizeof(line));

    while ((actlen = read_fixed_length_line(stdin, line, MAXLINELEN)) != EOF)
    {
        lineno++;
        if (actlen != MAXLINELEN)
        {
            if (actlen > MAXLINELEN)
                printf("%2d:L: length %2d <<%s>>\n", lineno, actlen, line);
            else
                printf("%2d:S: length %2d <<%s>>\n", lineno, actlen, line);
        }
        else
            printf("%2d:R: length %2d <<%s>>\n", lineno, actlen, line);
        assert(line[MAXLINELEN-0] == '\0');
        assert(line[MAXLINELEN+1] == '\0');
    }
    return 0;
}

#endif /* TEST */

Test data and output

$ cat xxx

abcdefghij
a
Abcdefghij
ab
aBcdefghij
abc
abCdefghij
abcd
abcDefghij
abcde
abcdEfghij
abcdef
abcdeFghij
abcdefg
abcdefGhij
abcdefgh
abcdefgHij
abcdefghi
abcdefghIj
abcdefghiJ
abcdefghiJ1
AbcdefghiJ
abcdefghiJ12
aBcdefghiJ
abcdefghiJ123
$ ./rdfixlen < xxx
 1:S: length  0 <<>>
 2:R: length 10 <<abcdefghij>>
 3:S: length  1 <<a>>
 4:R: length 10 <<Abcdefghij>>
 5:S: length  2 <<ab>>
 6:R: length 10 <<aBcdefghij>>
 7:S: length  3 <<abc>>
 8:R: length 10 <<abCdefghij>>
 9:S: length  4 <<abcd>>
10:R: length 10 <<abcDefghij>>
11:S: length  5 <<abcde>>
12:R: length 10 <<abcdEfghij>>
13:S: length  6 <<abcdef>>
14:R: length 10 <<abcdeFghij>>
15:S: length  7 <<abcdefg>>
16:R: length 10 <<abcdefGhij>>
17:S: length  8 <<abcdefgh>>
18:R: length 10 <<abcdefgHij>>
19:S: length  9 <<abcdefghi>>
20:R: length 10 <<abcdefghIj>>
21:R: length 10 <<abcdefghiJ>>
22:L: length 11 <<abcdefghiJ>>
23:R: length 10 <<AbcdefghiJ>>
24:L: length 12 <<abcdefghiJ>>
25:R: length 10 <<aBcdefghiJ>>
26:L: length 13 <<abcdefghiJ>>
$ 

Before you actually write the line-reading code there is one big problem you should fix:

  • Insufficient Allocation You need to allocate 100 for the line, 1 for the '\\n' and 1 for the '\\0' . This totals 102. Even though you are using fgets for safety, this prevents you from detecting a too-long line.

After that, proper processing is easy (commented heavily):

char *end = line + sizeof line - 2;
int ch, nlines=0;
while(fgets(line, sizeof line, file)){
    nlines++;
    if(strchr(line, '\n')!=end){ // the line is too short or long
        if(strlen(line) == sizeof line - 1)//line too long
           do { ch=getc(file)); }while(ch!=EOF && ch!='\n'); //eat until newline
        printf("Error on line %d", nlines); 
        continue;  //skip line
    }
    //do whatever.
}

The options for file-based access have been covered extensively by others. However if you have the mmap system call, there is another option. mmap maps the file into virtual memory and reads it in when you access it. It is very convenient and lets you treat the file as a single string.

Note that the file is mapped below using MAP_PRIVATE which means that changes to the string (file) are not written back to the real file. Using MAP_SHARED writes changes back to the file (not what is wanted here).

Here is some code to get you started. We are going to map the file then process it:

char * file = map_file(filename);

if (file)
    read_equal_sized_lines(file, size);

First we map the file:

static char * map_file(const char *filename)
{
    struct stat st;
    char *file = NULL;

    int fd = open(filename, O_RDONLY);
    if (fd < 0)
        perror(filename);
    else if (fstat(fd, &st) < 0)
        perror("fstat");
    else if ((file = mmap(0, st.st_size,
                          PROT_READ | PROT_WRITE,
                          MAP_FILE | MAP_PRIVATE, fd, 0)) == MAP_FAILED) {
        perror("mmap");
        file = NULL;
    }
    return file;
}

Now we have a string and can manipulate it simply:

static size_t get_line(char *s)
{
    char *end = strchr(s, '\n');
    if (end) {
        *end++ = '\0'; /* terminate the line */
        return (size_t) (end - s);
    }
    return strlen(s);
}


static void read_equal_sized_lines(char *file, size_t size)
{
    int line_nr = 1;

    while (*file != '\0') {
        size_t len = get_line(file);
        /* file points to nul-terminated line; do what you want with it */
        if (len != size)
            printf("Line %d: ERROR\n", line_nr);

        file += len;
        ++line_nr;
    }
}

Try this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define maxLen 100

int main() {
int lineNumber = 0;
char buffer[2048];
FILE *myFile = fopen("dataFile.txt", "r");

while ((fgets(buffer, 2048, myFile) != NULL)) {
    buffer[strlen(buffer) - 1] = '\0';
    lineNumber++;
    if (strlen(buffer) != maxLen) {
        printf("Error in line: %d\n", lineNumber);
    }
}
return 0;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM