简体   繁体   中英

How to get number of characters in a text file in C

I'm writing a c program that takes a text file name as an argument and prints its contents.

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

int main(int argc, char *argv[])
{
    char *filepath = "source.txt";
    int fd = open(filepath, O_RDONLY);
    if (fd == -1)
    {
        printf("\n open() failed with error [%s]\n", strerror(errno));
        return 1;
    }
    char buffer[1000];
    read(fd, buffer, 1000);
    printf("%s", buffer);
    close(fd);
    return 0;
}

I don't know how to get the number of characters in a text file, so I just set the array size 1000. It seems to work, but how can I make this work for text file with any number of characters? And I'm only allowed to use open, read, write, read system calls.

在此处输入图像描述

Write a loop. In the loop, read 1000 bytes (or any number you find convenient) and pay attention to the return value of read , which is the number of bytes actually read, so you can add it to your total. If read returns 0, it reached the end of file, so you can stop. If it returns -1, there was an error (in errno ) and you probably want to report it and exit.

The right answer is "you don't". If you check the size of the file before reading, you may risk that it changes before or during read. The right way is to read the file once, and only once, from beginning to end, and do whatever you want to do with the data while you are reading. (There are naturally exceptions to this "read once from beginning to end", but very unlikely for text files)

If you want to store the file in memory, you have to grow the buffer with realloc as you are reading from the file:

char *read_file(const char *path)
{
  char *file_buffer = NULL;
  size_t file_length = 0;
  int fd = open(path, O_RDONLY);
  if (fd == -1)
    {
      perror("Failed to open file");
      exit(EXIT_FAILURE);
    }
  while (1)
    {
      /* Make space for more bytes in file_buffer */
      char *tmp = realloc(file_buffer, file_length + 1000);
      if (!tmp)
        {
          perror("Failed to allocate memory");
          exit(EXIT_FAILURE);
        }
      file_buffer = tmp;

      /* Read into buffer after existing content */
      ssize_t read_length = read(fd, file_buffer + file_length, 1000);
      if (read_length == -1)
        {
          perror("Failed to read from file");
          exit(EXIT_FAILURE);
        }
      else if (read_length == 0)
        {
          /* 
             End of file is reached, and since this is a text-file,
             having a zero-terminated string is probably appropriate
           */
          file_length += 1;
          char *tmp = realloc(file_content, file_length);
          if (!tmp)
            {
              perror("Failed to allocate memory");
              exit(EXIT_FAILURE);
            }
          file_content = tmp;
          file_content[file_length-1] = 0;
          close(fd);
          return file_content;
        }
       else
         {
            /* some bytes were successfully read, so update file_length */
            file_length += read_length;
         }
    }
}

Naturally, if you only want to print the content of the buffer, there is no need for realloc (or malloc for that matter), just print out the content as you go:

size_t print_file(const char *path)
{
  /* 
     We keep track of file length, just for the fun of it, and to have
     something to return
   */
  size_t file_length = 0;
  int fd = open(path, O_RDONLY);
  if (fd == -1)
    {
      perror("Failed to open file");
      exit(EXIT_FAILURE);
    }
  while (1)
    {
      /* Temporary scratch buffer */
      char buffer[1000];

      ssize_t read_length = read(fd, buffer, 1000);
      if (read_length == -1)
        {
          perror("Failed to read from file");
          exit(EXIT_FAILURE);
        }
      else if (read_length == 0)
        {
          /* 
             End of file is reached, and nothing more to do.
             We return file_length, just to return something
           */
          close(fd);
          return file_length;
        }
       else
         {
            /* some bytes were successfully read, so we print them */
            char *b = buffer;
            while (read_length)
              {
                ssize_t write_length = write(0, b, read_length);
                if (write_length == -1)
                  {
                    perror("Failed to write to file");
                    exit(EXIT_FAILURE);
                  }
                else if (write_length == 0)
                  {
                     /* 
                       File descriptor 0 was closed.
                       Handling this better is left as en exercise 
                       for the reader
                     */
                     close(fd);
                     return file_length;
                  }
                else
                  {
                    file_length += write_length;
                    b += write_length;
                    read_length -= write_length;
                  }
              }
         }
    }
}

This code uses open / read / write to do IO. This is only because that is what the OP asked for. A better program would have used fopen / fread / fwrite or fopen / fgetc / fputc .

In my knowledge there are two options:

  1. Read in steps, for example, 1000 character a time. Use fread .

  2. Determine the file size before opening the file with system call stat . Use the command man 2 stat for help.

STAT(2)                    Linux Programmer's Manual                   STAT(2)

NAME
       stat, fstat, lstat, fstatat - get file status

SYNOPSIS
       #include <sys/types.h>
       #include <sys/stat.h>
       #include <unistd.h>

       int stat(const char *pathname, struct stat *statbuf);
       int fstat(int fd, struct stat *statbuf);
       int lstat(const char *pathname, struct stat *statbuf);

       #include <fcntl.h>           /* Definition of AT_* constants */
       #include <sys/stat.h>

       int fstatat(int dirfd, const char *pathname, struct stat *statbuf,
                   int flags);

...

       struct stat {
           dev_t     st_dev;         /* ID of device containing file */
           ino_t     st_ino;         /* Inode number */
           mode_t    st_mode;        /* File type and mode */
           nlink_t   st_nlink;       /* Number of hard links */
           uid_t     st_uid;         /* User ID of owner */
           gid_t     st_gid;         /* Group ID of owner */
           dev_t     st_rdev;        /* Device ID (if special file) */
           off_t     st_size;        /* Total size, in bytes */
       //  ^^^^^^^^^^^^^^^^^^ Use this.

           blksize_t st_blksize;     /* Block size for filesystem I/O */
           blkcnt_t  st_blocks;      /* Number of 512B blocks allocated */

           /* Since Linux 2.6, the kernel supports nanosecond
              precision for the following timestamp fields.
              For the details before Linux 2.6, see NOTES. */

           struct timespec st_atim;  /* Time of last access */
           struct timespec st_mtim;  /* Time of last modification */
           struct timespec st_ctim;  /* Time of last status change */

       #define st_atime st_atim.tv_sec      /* Backward compatibility */
       #define st_mtime st_mtim.tv_sec
       #define st_ctime st_ctim.tv_sec
       };

...

Option 1 is more reliable, since with option 2 the file size may change between size determination and file reading.

// I got this from:
// https://www.geeksforgeeks.org/c-program-to-count-the-number-of-characters-in-a-file/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int numChars(char * path) {
    int n, c;
    FILE* fp;
    char filepath[100];
    strcpy(filepath,path);
    fp = fopen(filepath, "r"); 
    if (fp == NULL) { 
        printf("Could not open file %s", filepath); 
        return 0; 
    } 
    for (n = getc(fp); n != EOF; n = getc(fp)) 
        c = c + 1; 
    fclose(fp); 
    return c;
}

int main(int argc, char *argv[])
{
    char *filepath = "source.txt";
    printf("\nThe number of chars in %s is:  %d\n", filepath, numChars(filepath));
    return 0;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM