[英]How to get the number of characters in a file (not bytes) in C on Linux
[英]How to get number of characters in a text file in C
我正在编写一个 c 程序,该程序将文本文件名作为参数并打印其内容。
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
int main(int argc, char *argv[])
{
char *filepath = "source.txt";
int fd = open(filepath, O_RDONLY);
if (fd == -1)
{
printf("\n open() failed with error [%s]\n", strerror(errno));
return 1;
}
char buffer[1000];
read(fd, buffer, 1000);
printf("%s", buffer);
close(fd);
return 0;
}
我不知道如何获取文本文件中的字符数,所以我只是将数组大小设置为 1000。它似乎可以工作,但是我怎样才能使它适用于具有任意数量字符的文本文件呢? 而且我只被允许使用 open、read、write、read 系统调用。
写一个循环。 在循环中,读取 1000 个字节(或任何您认为方便的数字)并注意read
的返回值,即实际读取的字节数,因此您可以将其添加到总数中。 如果read
返回 0,它到达了文件的末尾,所以你可以停止。 如果它返回 -1,则存在错误(在errno
中),您可能想要报告它并退出。
正确的答案是“你没有”。 如果您在读取前检查文件的大小,您可能会冒在读取前或读取期间文件更改的风险。 正确的方法是从头到尾读取文件一次,并且只读取一次,并在读取时对数据执行任何您想要执行的操作。 (这种“从头到尾读取一次”自然有例外,但对于文本文件不太可能)
如果要将文件存储在 memory 中,则必须在读取文件时使用realloc
增大缓冲区:
char *read_file(const char *path)
{
char *file_buffer = NULL;
size_t file_length = 0;
int fd = open(path, O_RDONLY);
if (fd == -1)
{
perror("Failed to open file");
exit(EXIT_FAILURE);
}
while (1)
{
/* Make space for more bytes in file_buffer */
char *tmp = realloc(file_buffer, file_length + 1000);
if (!tmp)
{
perror("Failed to allocate memory");
exit(EXIT_FAILURE);
}
file_buffer = tmp;
/* Read into buffer after existing content */
ssize_t read_length = read(fd, file_buffer + file_length, 1000);
if (read_length == -1)
{
perror("Failed to read from file");
exit(EXIT_FAILURE);
}
else if (read_length == 0)
{
/*
End of file is reached, and since this is a text-file,
having a zero-terminated string is probably appropriate
*/
file_length += 1;
char *tmp = realloc(file_content, file_length);
if (!tmp)
{
perror("Failed to allocate memory");
exit(EXIT_FAILURE);
}
file_content = tmp;
file_content[file_length-1] = 0;
close(fd);
return file_content;
}
else
{
/* some bytes were successfully read, so update file_length */
file_length += read_length;
}
}
}
当然,如果您只想打印缓冲区的内容,则不需要realloc
(或malloc
),只需像 go 一样打印内容即可:
size_t print_file(const char *path)
{
/*
We keep track of file length, just for the fun of it, and to have
something to return
*/
size_t file_length = 0;
int fd = open(path, O_RDONLY);
if (fd == -1)
{
perror("Failed to open file");
exit(EXIT_FAILURE);
}
while (1)
{
/* Temporary scratch buffer */
char buffer[1000];
ssize_t read_length = read(fd, buffer, 1000);
if (read_length == -1)
{
perror("Failed to read from file");
exit(EXIT_FAILURE);
}
else if (read_length == 0)
{
/*
End of file is reached, and nothing more to do.
We return file_length, just to return something
*/
close(fd);
return file_length;
}
else
{
/* some bytes were successfully read, so we print them */
char *b = buffer;
while (read_length)
{
ssize_t write_length = write(0, b, read_length);
if (write_length == -1)
{
perror("Failed to write to file");
exit(EXIT_FAILURE);
}
else if (write_length == 0)
{
/*
File descriptor 0 was closed.
Handling this better is left as en exercise
for the reader
*/
close(fd);
return file_length;
}
else
{
file_length += write_length;
b += write_length;
read_length -= write_length;
}
}
}
}
}
此代码使用open
/ read
/ write
来执行 IO。 这只是因为这是 OP 所要求的。 更好的程序会使用fopen
/ fread
/ fwrite
或fopen
/ fgetc
/ fputc
。
据我所知,有两种选择:
分步阅读,例如,一次 1000 个字符。 使用fread
。
在使用系统调用stat
打开文件之前确定文件大小。 使用命令man 2 stat
寻求帮助。
STAT(2) Linux Programmer's Manual STAT(2)
NAME
stat, fstat, lstat, fstatat - get file status
SYNOPSIS
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
int stat(const char *pathname, struct stat *statbuf);
int fstat(int fd, struct stat *statbuf);
int lstat(const char *pathname, struct stat *statbuf);
#include <fcntl.h> /* Definition of AT_* constants */
#include <sys/stat.h>
int fstatat(int dirfd, const char *pathname, struct stat *statbuf,
int flags);
...
struct stat {
dev_t st_dev; /* ID of device containing file */
ino_t st_ino; /* Inode number */
mode_t st_mode; /* File type and mode */
nlink_t st_nlink; /* Number of hard links */
uid_t st_uid; /* User ID of owner */
gid_t st_gid; /* Group ID of owner */
dev_t st_rdev; /* Device ID (if special file) */
off_t st_size; /* Total size, in bytes */
// ^^^^^^^^^^^^^^^^^^ Use this.
blksize_t st_blksize; /* Block size for filesystem I/O */
blkcnt_t st_blocks; /* Number of 512B blocks allocated */
/* Since Linux 2.6, the kernel supports nanosecond
precision for the following timestamp fields.
For the details before Linux 2.6, see NOTES. */
struct timespec st_atim; /* Time of last access */
struct timespec st_mtim; /* Time of last modification */
struct timespec st_ctim; /* Time of last status change */
#define st_atime st_atim.tv_sec /* Backward compatibility */
#define st_mtime st_mtim.tv_sec
#define st_ctime st_ctim.tv_sec
};
...
选项 1 更可靠,因为使用选项 2,文件大小可能会在大小确定和文件读取之间发生变化。
// I got this from:
// https://www.geeksforgeeks.org/c-program-to-count-the-number-of-characters-in-a-file/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int numChars(char * path) {
int n, c;
FILE* fp;
char filepath[100];
strcpy(filepath,path);
fp = fopen(filepath, "r");
if (fp == NULL) {
printf("Could not open file %s", filepath);
return 0;
}
for (n = getc(fp); n != EOF; n = getc(fp))
c = c + 1;
fclose(fp);
return c;
}
int main(int argc, char *argv[])
{
char *filepath = "source.txt";
printf("\nThe number of chars in %s is: %d\n", filepath, numChars(filepath));
return 0;
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.