Linux 中的 C 編程：無法為找到文件中子字符串出現次數的程序獲得正確的輸出

Question

我正在編寫一個程序，該程序從寫入緩沖區的文本文件（也從命令行讀取）中的命令行中查找輸入子字符串的出現次數。

當我在 bash 中運行代碼時，出現錯誤：分段錯誤（核心已轉儲）。 我仍在學習如何在這種環境中使用 C 進行編碼，並且對發生分段錯誤的原因（濫用動態內存分配？）有所了解，但我找不到它的問題。 我所能得出的結論是，問題出在 for 循環內部（我標記了代碼中潛在錯誤的產生位置）。

編輯：我設法通過將argv[j]更改為argv[i]來修復分段錯誤錯誤，但是當我現在運行代碼時，即使子字符串在文本文件中多次出現，count1 也總是返回 0 並且我不確定即使我已經多次閱讀代碼，又有什么問題。

$ more foo.txt

aabbccc

$ ./main foo.txt a

0

#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>

int main(int argc, char *argv[]) {

    FILE *fp; 
    long lsize; 
    char *buf;
    int count = 0, count1 = 0; 
    int i, j, k, l1, l2;   

    if (argc < 3) { printf("Error: insufficient arguments.\n"); return(1); };

    fp = fopen(argv[1], "r"); 

    if (!fp) { 
        perror(argv[1]); 
        exit(1); 
    }

    //get size of file 
    fseek(fp, 0L, SEEK_END);
    lsize = ftell(fp); 
    rewind(fp);

    //allocate memory for entire content
    buf = calloc(1, lsize+1);

    if (!buf) { 
        fclose(fp); 
        fputs("Memory alloc fails.\n", stderr); 
        exit(1); 
    }

    //copy the file into the buffer
    if (1 != fread(buf, lsize, 1, fp)) {
        fclose(fp);
        free(buf); 
        fputs("Entire read fails.\n", stderr); 
        exit(1); 
    }

    l1 = strlen(buf);

    //error is somewhere here
    for (i = 2; i < argc; i++) {
        for (j = 0; j < l1;) {
            k = 0; 
            count = 0; 
            while ((&buf[j] == argv[k])) {
                count++;
                j++; 
                k++; 
            }
            if (count == strlen(argv[j])) {
                count1++; 
                count = 0; 
            }
            else
                j++; 
        }
        printf("%d\n", count1);
    }

    fclose(fp); 

    return 0; 
}

Answer 1

fread(buf, lsize, 1, fp)將讀取 1 個lsize字節塊，但是fread不關心內容並且不會為字符串添加'\\0'終止字節，因此l1 = strlen(buf); 產生未定義的行為，因此可以忽略結果的其余部分（並且您的計數也有錯誤）。 請注意，文件通常在末尾沒有以 0 結尾的字節，即使對於包含文本的文件也適用，它們通常以換行符結尾。

您必須自己設置 0 終止字節：

if (1 != fread(buf, lsize, 1, fp)) {
    fclose(fp);
    free(buf); 
    fputs("Entire read fails.\n", stderr); 
    exit(1); 
}

buf[lsize] = '0';

您可以使用strstr來獲取子字符串的位置，如下所示：

for(i = 2; i < argc; ++i)
{
    char *content = buf;
    int count = 0;

    while((content = strstr(content, argv[i])))
    {
        count++;
        content++; // point to the next char in the substring
    }

    printf("The substring '%s' appears %d time(s)\n", argv[i], count);

}

你的計數是錯誤的，有一些錯誤。 這個比較

&buf[j] == argv[k]

錯了，你是在比較指針，而不是內容。 您必須使用strcmp來比較字符串。 在這種情況下，您將不得不使用strncmp因為您只想匹配子字符串：

while(strncmp(&buf[j], argv[k], strlen(argv[k])) == 0)
{
    // substring matched
}

但這也是錯誤的，因為您也在增加k ，這將為您提供下一個參數，如果子字符串長於參數數量，最后您可能會超出argv的限制。 根據您的代碼，您必須比較字符：

while(buf[j] == argv[i][k])
{
    j++;
    k++;
}

只有在匹配子字符串時才必須增加counter ，如下所示：

l1 = strlen(buf);

for (i = 2; i < argc; i++) {
    int count = 0;
    int k = 0; // running index for inspecting argv[i]
    for (j = 0; j < l1; ++j) {
        while(buf[j + k] == argv[i][k])
            k++;

        // if all characters of argv[i] 
        // matched, argv[i][k] will be the
        // 0-terminating byte
        if(argv[i][k] == 0)
            count++;

        // reset running index for argv[i]
        // go to next char if buf
        k = 0;
    }

    printf("The substring '%s' appears %d time(s)\n", argv[i], count);
}

Linux 中的 C 編程：無法為找到文件中子字符串出現次數的程序獲得正確的輸出

問題描述

1 個解決方案

解決方案1
0 已采納 2018-02-20 01:37:44

Linux 中的 C 編程：無法為找到文件中子字符串出現次數的程序獲得正確的輸出

問題描述

1 個解決方案

解決方案1 0 已采納 2018-02-20 01:37:44

解決方案1
0 已采納 2018-02-20 01:37:44