[英]Read a txt file with gets in C
我想知道什么是使用c中的gets函數讀取包含兩行數字的txt文件並在1秒內將它們保存在數組中的最佳選擇。
假設以下示例為名為ooo.txt的txt文件,並且第一行的數字為2.000.000(將是數組的大小),第二行的數字為2.000.000,這將存儲在數組中。
例如
2000000
59 595 45 492 89289 5 8959 (+1.999.993 numbers)
我嘗試的代碼(僅fcanf函數)
int t_size;
fscanf(fp, "%d",&t_size); //bypass the first character!
int* my_array = NULL;
my_array = malloc(t_size*sizeof(*my_array));
if (my_array==NULL) {
printf("Error allocating memory!\n"); //print an error message
return 1; //return with failure
getchar();
}
int i =0;
for ( i = 0; i < t_size; i++ )
{
fscanf(fp, "%d",&my_array[i]); /*p[i] is the content of element at index i and &p[i] is the address of element at index i */
}
到目前為止,最好的代碼是在1秒內完成該過程的代碼
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <time.h>
int is_end(char* input) {
return *input == 0;
}
int is_linebreak(char* input) {
return *input == '\r' || *input == '\n' || *input == ' ';
}
char* eat_linebreaks(char* input) {
while (is_linebreak(input))
++input;
return input;
}
size_t count_lines(char* input) {
char* p = input;
size_t rows = 1;
if (is_end(p))
return 0;
while (!is_end(p)) {
if (is_linebreak(p)) {
++rows;
p = eat_linebreaks(p);
}
else {
++p;
}
}
return rows;
}
/* split string by lines */
char** get_lines(char* input, size_t line_count) {
char* p = input;
char* from = input;
size_t length = 0;
size_t line = 0;
int i;
char** lines = (char**)malloc(line_count * sizeof(char*));
do {
if (is_end(p) || is_linebreak(p)) {
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
length = 0;
++line;
p = eat_linebreaks(p);
from = p;
}
else {
++length;
++p;
}
} while (!is_end(p));
// Copy the last line as well in case the input doesn't end in line-break
lines[line] = (char*)malloc(length + 1);
for (i = 0; i < length; ++i)
lines[line][i] = *(from + i);
lines[line][length] = 0;
++line;
return lines;
}
int main(int argc, char* argv[]) {
clock_t start;
unsigned long microseconds;
float seconds;
char** lines;
size_t size;
size_t number_of_rows;
int count;
int* my_array;
start = clock();
FILE *stream;
char *contents;
int fileSize = 0;
int i;
// Open file, find the size of it
stream = fopen(argv[1], "rb");
fseek(stream, 0L, SEEK_END);
fileSize = ftell(stream);
fseek(stream, 0L, SEEK_SET);
// Allocate space for the entire file content
contents = (char*)malloc(fileSize + 1);
// Stream file into memory
size = fread(contents, 1, fileSize, stream);
contents[size] = 0;
fclose(stream);
// Count rows in content
number_of_rows = count_lines(contents);
// Get array of char*, one for each line
lines = get_lines(contents, number_of_rows);
// Get the numbers out of the lines
count = atoi(lines[0]); // First row has count
my_array = (int*)malloc(count * sizeof(int));
for (i = 0; i < count; ++i) {
my_array[i] = atoi(lines[i + 1]);
}
microseconds = clock() - start;
seconds = microseconds / 1000000.0f;
printf("Took %fs", seconds);
return 0;
}
首先,您將想使用fgets
來避免危險的緩沖區溢出。 其次,您要刪除數字中的所有標點符號。 因此2.000.000變為2000000。然后可以使用指針和strtol
函數將字符轉換為整數; 還有其他函數可以轉換為float
和其他類型。
由於代碼想要的速度和IO是一個典型的瓶頸,使用后整個文件讀一次fstat()
找到它的長度( @Charlon )有一定的道理。 以下是對該緩沖區的快速解析。
// Stream file into memory
size = fread(contents, 1, fileSize, stream);
contents[size] = 0;
fclose(stream);
#if 1
// new code
size_t array_n;
int n;
if (sscanf(contents, "%zu%n", &array_n, &n) != 1) Handle_BadInput();
my_array = malloc(array_n * sizeof *my_array);
if (my_array == NULL) Handle_OOM();
char *p = &contents[n];
errno = 0;
char *endptr;
for (size_t count = 0; count < array_n; count++) {
my_array[count] = strtol(p, &endptr, 10);
if (p == endptr || errno)
Handle_BadInput();
p = endptr;
}
char ch;
if (sscanf(p, " %c", &ch) == 1) Handle_ExtraInput();
#else
//old code
// Count rows in content
number_of_rows = count_lines(contents);
// Get array of char*, one for each line
lines = get_lines(contents, number_of_rows);
// Get the numbers out of the lines
count = atoi(lines[0]); // First row has count
my_array = (int*)malloc(count * sizeof(int));
for (i = 0; i < count; ++i) {
my_array[i] = atoi(lines[i + 1]);
}
#endif
仍然更喜歡一次讀取一個數字的可伸縮方法。
最快的方法需要大量RAM:
1)打開文件(man打開)
2)使用fstat函數獲取文件的大小(man fstat)
3)使用緩沖區malloc讀取文件,其大小僅為2)(man malloc)
4)關閉文件(man關閉)
5)解析緩沖區並將每個數字塊(每次直到''或'\\ 0')轉換為int
編輯:如果您的RAM不夠大,則需要創建一個get_next_int函數,該函數僅將緩沖區中的下一個數字存儲在文件中
編輯2:您可以閱讀直到知道需要存儲的int數量,並將此數量與安全系數與ram的大小進行比較,然后使用良好的方法,以便在以下情況下程序不會將errno設置為ENOMEM:你懂我在說什么 ;)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.