簡體   English   中英

讀取帶有C語言的txt文件

[英]Read a txt file with gets in C

我想知道什么是使用c中的gets函數讀取包含兩行數字的txt文件並在1秒內將它們保存在數組中的最佳選擇。

假設以下示例為名為ooo.txt的txt文件,並且第一行的數字為2.000.000(將是數組的大小),第二行的數字為2.000.000,這將存儲在數組中。

例如

2000000
59 595 45 492 89289 5 8959 (+1.999.993 numbers)

我嘗試的代碼(僅fcanf函數)

int t_size;
fscanf(fp, "%d",&t_size);  //bypass the first character!




        int* my_array = NULL; 
        my_array = malloc(t_size*sizeof(*my_array));


        if (my_array==NULL) {
        printf("Error allocating memory!\n"); //print an error message
        return 1; //return with failure
        getchar();
        }

        int i =0;
        for ( i = 0; i < t_size; i++ )
        {
        fscanf(fp, "%d",&my_array[i]);  /*p[i] is the content of element at index i and &p[i] is the address of element at index i */
        }

到目前為止,最好的代碼是在1秒內完成該過程的代碼

 #include <stdlib.h>
    #include <stdio.h>
    #include <string.h>
    #include <assert.h>
    #include <time.h>

    int is_end(char* input) {
        return *input == 0;
    }

    int is_linebreak(char* input) {
        return *input == '\r' || *input == '\n' || *input == ' ';
    }

    char* eat_linebreaks(char* input) {
        while (is_linebreak(input))
            ++input;

        return input;
    }

    size_t count_lines(char* input) {
        char* p = input;
        size_t rows = 1;

        if (is_end(p))
            return 0;

        while (!is_end(p)) {
            if (is_linebreak(p)) {
                ++rows;
                p = eat_linebreaks(p);
            }
            else {
                ++p;
            }
        }
        return rows;
    }

    /* split string by lines */
    char** get_lines(char* input, size_t line_count) {
        char* p = input;
        char* from = input;
        size_t length = 0;
        size_t line = 0;
            int i;
        char** lines = (char**)malloc(line_count * sizeof(char*));

        do {
            if (is_end(p) || is_linebreak(p)) {
                lines[line] = (char*)malloc(length + 1);
                for (i = 0; i < length; ++i)
                    lines[line][i] = *(from + i);

                lines[line][length] = 0;
                length = 0;
                ++line;
                p = eat_linebreaks(p);
                from = p;

            }
            else {
                ++length;
                ++p;
            }
        } while (!is_end(p));

        // Copy the last line as well in case the input doesn't end in line-break
        lines[line] = (char*)malloc(length + 1);
        for (i = 0; i < length; ++i)
            lines[line][i] = *(from + i);

        lines[line][length] = 0;
        ++line;


        return lines;
    }

    int main(int argc, char* argv[]) {
        clock_t start;
        unsigned long microseconds;
        float seconds;
        char** lines;
        size_t size;
        size_t number_of_rows;
        int count;
        int* my_array;
        start = clock();

        FILE *stream;
        char *contents;
        int fileSize = 0;
            int i;

        // Open file, find the size of it
        stream = fopen(argv[1], "rb");
        fseek(stream, 0L, SEEK_END);
        fileSize = ftell(stream);
        fseek(stream, 0L, SEEK_SET);

        // Allocate space for the entire file content
        contents = (char*)malloc(fileSize + 1);

        // Stream file into memory
        size = fread(contents, 1, fileSize, stream);
        contents[size] = 0; 
        fclose(stream);

        // Count rows in content
        number_of_rows = count_lines(contents);

        // Get array of char*, one for each line
        lines = get_lines(contents, number_of_rows);

        // Get the numbers out of the lines
        count = atoi(lines[0]); // First row has count
        my_array = (int*)malloc(count * sizeof(int));
        for (i = 0; i < count; ++i) {
            my_array[i] = atoi(lines[i + 1]);
        }

        microseconds = clock() - start;
        seconds = microseconds / 1000000.0f;
        printf("Took %fs", seconds);


        return 0;
    }

首先,您將想使用fgets來避免危險的緩沖區溢出。 其次,您要刪除數字中的所有標點符號。 因此2.000.000變為2000000。然后可以使用指針和strtol函數將字符轉換為整數; 還有其他函數可以轉換為float和其他類型。

由於代碼想要的速度和IO是一個典型的瓶頸,使用后整個文件讀一次fstat()找到它的長度( @Charlon )有一定的道理。 以下是對該緩沖區的快速解析。

    // Stream file into memory
    size = fread(contents, 1, fileSize, stream);
    contents[size] = 0; 
    fclose(stream);

    #if 1
    // new code

    size_t array_n;
    int n;
    if (sscanf(contents, "%zu%n", &array_n, &n) != 1) Handle_BadInput();

    my_array = malloc(array_n * sizeof *my_array);
    if (my_array == NULL) Handle_OOM();

    char *p = &contents[n];
    errno = 0;
    char *endptr;
    for (size_t count = 0; count < array_n; count++) {
      my_array[count] = strtol(p, &endptr, 10);
      if (p == endptr || errno) 
        Handle_BadInput();
      p = endptr;
    }

    char ch;
    if (sscanf(p, " %c", &ch) == 1) Handle_ExtraInput();

    #else
    //old code 

    // Count rows in content
    number_of_rows = count_lines(contents);

    // Get array of char*, one for each line
    lines = get_lines(contents, number_of_rows);

    // Get the numbers out of the lines
    count = atoi(lines[0]); // First row has count
    my_array = (int*)malloc(count * sizeof(int));
    for (i = 0; i < count; ++i) {
       my_array[i] = atoi(lines[i + 1]);
    }
   #endif

仍然更喜歡一次讀取一個數字的可伸縮方法。

最快的方法需要大量RAM:

1)打開文件(man打開)

2)使用fstat函數獲取文件的大小(man fstat)

3)使用緩沖區malloc讀取文件,其大小僅為2)(man malloc)

4)關閉文件(man關閉)

5)解析緩沖區並將每個數字塊(每次直到''或'\\ 0')轉換為int

編輯:如果您的RAM不夠大,則需要創建一個get_next_int函數,該函數僅將緩沖區中的下一個數字存儲在文件中

編輯2:您可以閱讀直到知道需要存儲的int數量,並將此數量與安全系數與ram的大小進行比較,然后使用良好的方法,以便在以下情況下程序不會將errno設置為ENOMEM:你懂我在說什么 ;)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM