繁体   English   中英

如何在 C 中获取文本文件中的字符数

[英]How to get number of characters in a text file in C

我正在编写一个 c 程序,该程序将文本文件名作为参数并打印其内容。

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

int main(int argc, char *argv[])
{
    char *filepath = "source.txt";
    int fd = open(filepath, O_RDONLY);
    if (fd == -1)
    {
        printf("\n open() failed with error [%s]\n", strerror(errno));
        return 1;
    }
    char buffer[1000];
    read(fd, buffer, 1000);
    printf("%s", buffer);
    close(fd);
    return 0;
}

我不知道如何获取文本文件中的字符数,所以我只是将数组大小设置为 1000。它似乎可以工作,但是我怎样才能使它适用于具有任意数量字符的文本文件呢? 而且我只被允许使用 open、read、write、read 系统调用。

在此处输入图像描述

写一个循环。 在循环中,读取 1000 个字节(或任何您认为方便的数字)并注意read的返回值,即实际读取的字节数,因此您可以将其添加到总数中。 如果read返回 0,它到达了文件的末尾,所以你可以停止。 如果它返回 -1,则存在错误(在errno中),您可能想要报告它并退出。

正确的答案是“你没有”。 如果您在读取前检查文件的大小,您可能会冒在读取前或读取期间文件更改的风险。 正确的方法是从头到尾读取文件一次,并且只读取一次,并在读取时对数据执行任何您想要执行的操作。 (这种“从头到尾读取一次”自然有例外,但对于文本文件不太可能)

如果要将文件存储在 memory 中,则必须在读取文件时使用realloc增大缓冲区:

char *read_file(const char *path)
{
  char *file_buffer = NULL;
  size_t file_length = 0;
  int fd = open(path, O_RDONLY);
  if (fd == -1)
    {
      perror("Failed to open file");
      exit(EXIT_FAILURE);
    }
  while (1)
    {
      /* Make space for more bytes in file_buffer */
      char *tmp = realloc(file_buffer, file_length + 1000);
      if (!tmp)
        {
          perror("Failed to allocate memory");
          exit(EXIT_FAILURE);
        }
      file_buffer = tmp;

      /* Read into buffer after existing content */
      ssize_t read_length = read(fd, file_buffer + file_length, 1000);
      if (read_length == -1)
        {
          perror("Failed to read from file");
          exit(EXIT_FAILURE);
        }
      else if (read_length == 0)
        {
          /* 
             End of file is reached, and since this is a text-file,
             having a zero-terminated string is probably appropriate
           */
          file_length += 1;
          char *tmp = realloc(file_content, file_length);
          if (!tmp)
            {
              perror("Failed to allocate memory");
              exit(EXIT_FAILURE);
            }
          file_content = tmp;
          file_content[file_length-1] = 0;
          close(fd);
          return file_content;
        }
       else
         {
            /* some bytes were successfully read, so update file_length */
            file_length += read_length;
         }
    }
}

当然,如果您只想打印缓冲区的内容,则不需要realloc (或malloc ),只需像 go 一样打印内容即可:

size_t print_file(const char *path)
{
  /* 
     We keep track of file length, just for the fun of it, and to have
     something to return
   */
  size_t file_length = 0;
  int fd = open(path, O_RDONLY);
  if (fd == -1)
    {
      perror("Failed to open file");
      exit(EXIT_FAILURE);
    }
  while (1)
    {
      /* Temporary scratch buffer */
      char buffer[1000];

      ssize_t read_length = read(fd, buffer, 1000);
      if (read_length == -1)
        {
          perror("Failed to read from file");
          exit(EXIT_FAILURE);
        }
      else if (read_length == 0)
        {
          /* 
             End of file is reached, and nothing more to do.
             We return file_length, just to return something
           */
          close(fd);
          return file_length;
        }
       else
         {
            /* some bytes were successfully read, so we print them */
            char *b = buffer;
            while (read_length)
              {
                ssize_t write_length = write(0, b, read_length);
                if (write_length == -1)
                  {
                    perror("Failed to write to file");
                    exit(EXIT_FAILURE);
                  }
                else if (write_length == 0)
                  {
                     /* 
                       File descriptor 0 was closed.
                       Handling this better is left as en exercise 
                       for the reader
                     */
                     close(fd);
                     return file_length;
                  }
                else
                  {
                    file_length += write_length;
                    b += write_length;
                    read_length -= write_length;
                  }
              }
         }
    }
}

此代码使用open / read / write来执行 IO。 这只是因为这是 OP 所要求的。 更好的程序会使用fopen / fread / fwritefopen / fgetc / fputc

据我所知,有两种选择:

  1. 分步阅读,例如,一次 1000 个字符。 使用fread

  2. 在使用系统调用stat打开文件之前确定文件大小。 使用命令man 2 stat寻求帮助。

STAT(2)                    Linux Programmer's Manual                   STAT(2)

NAME
       stat, fstat, lstat, fstatat - get file status

SYNOPSIS
       #include <sys/types.h>
       #include <sys/stat.h>
       #include <unistd.h>

       int stat(const char *pathname, struct stat *statbuf);
       int fstat(int fd, struct stat *statbuf);
       int lstat(const char *pathname, struct stat *statbuf);

       #include <fcntl.h>           /* Definition of AT_* constants */
       #include <sys/stat.h>

       int fstatat(int dirfd, const char *pathname, struct stat *statbuf,
                   int flags);

...

       struct stat {
           dev_t     st_dev;         /* ID of device containing file */
           ino_t     st_ino;         /* Inode number */
           mode_t    st_mode;        /* File type and mode */
           nlink_t   st_nlink;       /* Number of hard links */
           uid_t     st_uid;         /* User ID of owner */
           gid_t     st_gid;         /* Group ID of owner */
           dev_t     st_rdev;        /* Device ID (if special file) */
           off_t     st_size;        /* Total size, in bytes */
       //  ^^^^^^^^^^^^^^^^^^ Use this.

           blksize_t st_blksize;     /* Block size for filesystem I/O */
           blkcnt_t  st_blocks;      /* Number of 512B blocks allocated */

           /* Since Linux 2.6, the kernel supports nanosecond
              precision for the following timestamp fields.
              For the details before Linux 2.6, see NOTES. */

           struct timespec st_atim;  /* Time of last access */
           struct timespec st_mtim;  /* Time of last modification */
           struct timespec st_ctim;  /* Time of last status change */

       #define st_atime st_atim.tv_sec      /* Backward compatibility */
       #define st_mtime st_mtim.tv_sec
       #define st_ctime st_ctim.tv_sec
       };

...

选项 1 更可靠,因为使用选项 2,文件大小可能会在大小确定和文件读取之间发生变化。

// I got this from:
// https://www.geeksforgeeks.org/c-program-to-count-the-number-of-characters-in-a-file/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int numChars(char * path) {
    int n, c;
    FILE* fp;
    char filepath[100];
    strcpy(filepath,path);
    fp = fopen(filepath, "r"); 
    if (fp == NULL) { 
        printf("Could not open file %s", filepath); 
        return 0; 
    } 
    for (n = getc(fp); n != EOF; n = getc(fp)) 
        c = c + 1; 
    fclose(fp); 
    return c;
}

int main(int argc, char *argv[])
{
    char *filepath = "source.txt";
    printf("\nThe number of chars in %s is:  %d\n", filepath, numChars(filepath));
    return 0;
}

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM