內存泄漏 - 如何為在另一個結構中作為線程參數傳遞的 Typdef 結構分配內存？

Question

我對 C 非常陌生，正在努力進行適當的內存管理，並且在我的程序中遇到了許多段錯誤。

我的最終目標是將文本文件和輸入的線程數作為用戶參數，獲取文件大小，然后根據用戶輸入的線程數拆分文件。

然后每個線程將讀取文件的一部分，然后從其讀取的塊中提取令牌。 如果標記大於 5 個字符，則將其與標記在整個文本中出現的次數一起添加到數組中。 所以最終我希望得到文本中使用的 > 5 個前 n 個單詞的列表。

然而，這可能是我第三次使用 C 了，而且我很掙扎，並且遇到了很多錯誤。 我一直在嘗試使用 valgrind 和其他調試工具自己解決這個問題，但我很迷茫。

從 valgrind，我收到以下消息：

==27634== 1 errors in context 1 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B7B4C: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 2 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B7014: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 3 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B3875: _itoa_word (_itoa.c:179)
==27634==    by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 4 of 5:
==27634== Use of uninitialised value of size 8
==27634==    at 0x50B386B: _itoa_word (_itoa.c:179)
==27634==    by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 5 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B78DA: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 0 from 0)

我還看到“地址 xxx 在塊大小 60 分配后為 0 字節”的消息

我相信我的問題在於：

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}

這是因為我沒有為 wordStruct 單詞正確分配內存嗎？ 我不知道如何解決它，任何幫助將不勝感激。

謝謝

完整代碼如下

#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>


typedef struct {
    char word[50];
    int count;
    int totalWords;
} wordsStruct ;



struct argStruct {
    FILE *file;
    int start;
    int end;
    int count;
    wordsStruct *words;
};



int stringLength(char s[]) {
    int c = 0;
    while (s[c] != '\0')
        c++;
    return c;
}



void groomString(char *line){

    for (int i = 0; line[i] != '\0'; ++i) {
        line[i] = tolower(line[i]);

        while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {


            for (int j = i; line[j] != '\0'; j++) {
                line[j] = line[j + 1];
                line[j] = '\0';
            }
        }
    }
}

int counter(int n){
    static int test;
    test = n;

    if(n = 0){
        return test;
    } else {
        n++;
        return n;
    }

}


void processFile(void *input) {
    struct argStruct params = *(struct argStruct *) input;

    wordsStruct *words = params.words;
    FILE *textFile = params.file;
    int start = params.start;
    int end = params.end;
    int count = params.count;

    int size = (end - start) + 10;
    char delim[] = " \t\v\n\r";


    char *readFile = (malloc(sizeof(char) * size +10));
    fread(readFile, 1, size, textFile);


    char *copy = (malloc(sizeof(char) * size +10));
    strcpy(copy, readFile);

    char *saveptr;

    int inArray;
    int length;
    static int added;

    char *token = strtok_r(copy, delim, &saveptr);

    while (token) {

        groomString(token);
        length = stringLength(token);

        if (length > 5) {

            inArray = 0;


            for (int i = 0; i < added; i++) {
                if (strcmp(words[i].word, token) == 0) {
                    inArray = i;
                }
            }

            if (inArray == 0) {
                added++;
                strcpy(words[added].word, token);
                words[added].count = 1;
            } else {
                words[inArray].count++;
            }
        }

        token = strtok_r(NULL, delim, &saveptr);
    }


     words->totalWords = added;

    free(token);
    free(readFile);
}




int main(int argc, char *argv[])
{
    FILE *pfile;
    int threadCount = 0, fileSize = 0, divide = 0;
    wordsStruct *allWords = (wordsStruct *) malloc( sizeof(wordsStruct));


    if (argc > 2)
    {
        pfile = fopen( argv[1], "r");
        if (pfile == NULL){
            perror("FILE OPEN FAILURE");
        }

        threadCount = atoi(argv[2]);

        pthread_t * thread = malloc(sizeof(pthread_t)* threadCount *10);

        fseek(pfile, 0, SEEK_END);
        fileSize= ftell(pfile);
        fseek(pfile, 0, SEEK_SET);


        divide = (fileSize/threadCount);

        struct argStruct arguments;
        arguments.file = pfile;
        arguments.words = allWords;

        int j = 0;
        for(int i = 0; i < threadCount; i++) {

            arguments.start = j;
            arguments.end = j+divide;

            arguments.count = i;

            struct argStruct *passArgs = malloc(sizeof *passArgs);
            *passArgs = arguments;

            pthread_create(&thread[i], NULL, (void *) processFile, passArgs);

            j+=divide;
        }


        for (int i = 0; i < threadCount +1; i++){
            pthread_join(thread[i], NULL);
        }

        fclose(pfile);

    } else {
        printf("Please enter text file name and number of threads");
    }

return 0;
}

Answer 1

對於初學者，在最后一個線程中，您需要 [設置end ]：

if (i == (threadCount - 1)) arguments.end = fileSize;

獲取最后一段中的所有字節，而不是超出 EOF。 只要文件大小不是線程數的精確倍數，就需要這樣做。

為了防止傳遞給線程的argStruct內存泄漏，在processFile的底部，您需要free(input)

此外，線程共享某些資源（請參閱man pthreads ）。 值得注意的是，打開文件描述符。 因此，您需要在訪問textFile使用互斥鎖。 (例如) pthread_mutex_lock(&text_mutex); 等。 阿爾。

而且，每個線程必須對它試圖訪問的文件部分執行自己的fseek 。

而且， size的“傾斜系數”為 10。這對於分配的區域來說是安全的，但會導致讀取太多數據。 最好不要使用+ 10 ，或者這樣做： int exact_size = end - start; . 另外，請注意，在malloc設置size和額外數量時，您正在執行“slop”，因此不需要這樣做。

另外，請注意fread並不能像fgets那樣保證緩沖區末尾的 EOS 字符 (0x00)。 所以，如果你打算做緩沖的字符串操作，你需要執行這個自己（以及你將需要至少1的“斜率”）：

所以，我們需要：

pthread_mutex_lock(&text_mutex);

fseek(textFile,start,0);
fread(readFile,1,exact_size,textFile);   
readFile[exact_size] = 0;

pthread_mutex_unlock(&text_mutex);

而且，請記住， main有初始化text_mutex與pthread_mutex_init做之前pthread_create 。

但...

在這里使用fread可能有問題。 當您將文件divide成長度為 div 的塊時，您[可能] 以這樣一種方式切碎文件，即第一個線程的最后一行被截斷，而下一個線程將在看到它的第一個完整行之前看到該行的其余部分，等等 ...

您最好在main對整個文件執行單個mmap ，並讓它掃描緩沖區，尋找換行符 [或空格]，並為每個線程提供一個保證與換行符對齊的段。

更新：

我編寫了一個使用mmap的版本並修復了一個小錯誤 [請原諒無償的樣式清理]。 它編譯干凈，即使使用-O2 -Wall [您應該始終使用它來捕獲所有警告]。 我沒有測試過它，但它應該讓你更進一步。

#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>

char *filebuf;

typedef struct {
    char word[50];
    int count;
    int totalWords;
} wordsStruct;

struct argStruct {
    pthread_t threadid;
    int start;
    int end;
    int count;
    wordsStruct *words;
};

int
stringLength(char s[])
{
    int c = 0;

    while (s[c] != '\0')
        c++;
    return c;
}

void
groomString(char *line)
{

    for (int i = 0; line[i] != '\0'; ++i) {
        line[i] = tolower(line[i]);

        while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {

            for (int j = i; line[j] != '\0'; j++) {
                line[j] = line[j + 1];
                line[j] = '\0';
            }
        }
    }
}

int
counter(int n)
{
    static int test;

    test = n;

    // NOTE/BUG: this is the assignment operator and you want equality
#if 0
    if (n = 0) {
#else
    if (n == 0) {
#endif
        return test;
    }
    else {
        n++;
        return n;
    }

}

void
processFile(void *input)
{
    struct argStruct *params = input;

    wordsStruct *words = params->words;

    int start = params->start;
    int end = params->end;
#if 0
    int count = params->count;
#endif

    int size = (end - start) + 10;
    char delim[] = " \t\v\n\r";

    char *copy = malloc(size + 1);
    memcpy(copy,&filebuf[start],size);
    copy[size] = 0;

    char *saveptr;

    int inArray;
    int length;
    static int added;

    char *token = strtok_r(copy, delim, &saveptr);

    while (token) {

        groomString(token);
        length = stringLength(token);

        if (length > 5) {

            inArray = 0;

            for (int i = 0; i < added; i++) {
                if (strcmp(words[i].word, token) == 0) {
                    inArray = i;
                }
            }

            if (inArray == 0) {
                added++;
                strcpy(words[added].word, token);
                words[added].count = 1;
            }
            else {
                words[inArray].count++;
            }
        }

        token = strtok_r(NULL, delim, &saveptr);
    }

    words->totalWords = added;

    free(copy);
    free(token);
}

int
main(int argc, char *argv[])
{
    int pfile;
    int threadCount = 0,
        fileSize = 0,
        divide = 0;
    struct stat st;
    off_t curpos;

    wordsStruct *allWords = (wordsStruct *) malloc(sizeof(wordsStruct));

    if (argc > 2) {
        pfile = open(argv[1],O_RDONLY);
        if (pfile < 0) {
            perror("FILE OPEN FAILURE");
        }

        threadCount = atoi(argv[2]);

        struct argStruct *threads =
            malloc(sizeof(struct argStruct) * threadCount);
        struct argStruct *arg;

        fstat(pfile,&st);
        fileSize = st.st_size;

        filebuf = mmap(NULL,fileSize,PROT_READ,MAP_PRIVATE,pfile,0);

        divide = (fileSize / threadCount);

#if 0
        int j = 0;
#endif

        for (int i = 0; i < threadCount; i++) {
            arg = &threads[i];

            arg->words = allWords;

            if (i == 0)
                arg->start = 0;
            else
                arg->start = arg[-1].end;

            curpos = arg->start + divide;

            for (;  curpos < fileSize;  ++curpos) {
                if (filebuf[curpos] == '\n') {
                    ++curpos;
                    break;
                }
            }

            if (curpos > fileSize)
                curpos = fileSize;

            arg->end = curpos;
            arg->count = i;
        }

        for (int i = 0; i < threadCount; i++) {
            arg = &threads[i];
            pthread_create(&arg->threadid, NULL, (void *) processFile, arg);
        }

        for (int i = 0; i < threadCount + 1; i++) {
            arg = &threads[i];
            pthread_join(arg->threadid, NULL);
        }

        munmap(filebuf,fileSize);
        close(pfile);

        free(threads);
    }
    else {
        printf("Please enter text file name and number of threads");
    }

    return 0;
}

更新#2：

哎呀，我錯過了一些東西......

因為added是用static定義的，所以所有線程都嘗試使用它。 他們比賽。 這將是“壞的”。 而且， params->words列表的索引將是錯誤的。

訪問它需要線程循環頂部/底部的互斥鎖/解鎖對或使用原子原語（例如stdatomic.h ）。

但是，互斥對會產生這樣的效果，即首先獲得互斥的線程將“獨占”它，並且所有線程或多或少會按順序運行。 從而破壞了擁有多個線程的目的。

因此，首先，我們要刪除static以便每個線程都有自己的副本。

但是，現在事實證明，當我們向列表中添加一個新詞時， words （即params->words ）不會“增長”。 因此，每當我們添加一個新單詞時，我們都必須增加列表的大小，因此我們需要添加一個realloc調用。

在單個公共列表上執行此操作（例如，您在main分配allWords的位置）是有問題的。 由於搜索和realloc ，必須受互斥鎖保護的代碼的“關鍵部分”幾乎是整個循環體。

因此，一種解決方案是讓每個線程維護它自己的每個線程列表（即每個線程的params->words是不同的）。 然后，線程不會競爭並且在運行時不需要任何互斥鎖。

但是，這將意味着線程之間存在重復項。

因此，在main對所有線程執行pthread_join之后， main必須重新創建一個單一的、統一的列表來消除重復項。

使用數組，這更麻煩。 鏈接列表可能會使重新組合各種列表變得更容易。

最簡單的方法是從每個線程列表中復制所有條目，並附加到一個大列表中。

然后，對這個列表進行排序。

然后，創建一個消除重復項的新列表。

這是修復這些問題的更新版本 [再次，未經測試]：

#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>

char *filebuf;

typedef struct {
    char word[50];
    int count;
#if 0
    int totalWords;
#endif
} wordsStruct;

struct argStruct {
    pthread_t threadid;
    int start;
    int end;
    int count;
    int totalWords;
    wordsStruct *words;
};

int
stringLength(char s[])
{
    int c = 0;

    while (s[c] != '\0')
        c++;
    return c;
}

void
groomString(char *line)
{

    for (int i = 0; line[i] != '\0'; ++i) {
        line[i] = tolower(line[i]);

        while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {

            for (int j = i; line[j] != '\0'; j++) {
                line[j] = line[j + 1];
                line[j] = '\0';
            }
        }
    }
}

int
counter(int n)
{
    static int test;

    test = n;

    // NOTE/BUG: this is the assignment operator and you want equality
#if 0
    if (n = 0) {
#else
    if (n == 0) {
#endif
        return test;
    }
    else {
        n++;
        return n;
    }

}

void *
processFile(void *input)
{
    struct argStruct *params = input;

    int start = params->start;
    int end = params->end;

    int size = (end - start) + 10;
    char delim[] = " \t\v\n\r";

    char *copy = malloc(size + 1);
    memcpy(copy,&filebuf[start],size);
    copy[size] = 0;

    char *saveptr;

    int inArray;
    int length;

    char *token = strtok_r(copy, delim, &saveptr);

    int added = 0;
    params->words = NULL;
    params->count = 0;

    while (token) {
        groomString(token);
        length = stringLength(token);

        if (length > 5) {
            wordsStruct *words = params->words;

            // try to find an existing word struct for the current token
            inArray = 0;
            for (int i = 0; i < added; i++) {
                if (strcmp(words[i].word, token) == 0) {
                    inArray = i;
                    break;
                }
            }

            // found a token that is already in the words list -- just increment
            // the count
            if (inArray != 0) {
                ++words[inArray].count;
                continue;
            }

            // add a new word struct to the list
            ++added;
            words = realloc(words,sizeof(wordsStruct) * added);
            params->words = words;

            // fill it in and initialize its count
            words += added;
            strcpy(words->word, token);
            words->count = 1;
        }

        token = strtok_r(NULL, delim, &saveptr);
    }

    params->totalWords = added;

    free(copy);
    free(token);

    return (void *) 0;
}

int
wordcmp(const void *a,const void *b)
{
    const wordsStruct *wa = a;
    const wordsStruct *wb = b;

    int cmpflg = strcmp(wa->word,wb->word);

    return cmpflg;
}

int
main(int argc, char *argv[])
{
    int pfile;
    int threadCount = 0,
        fileSize = 0,
        divide = 0;
    struct stat st;
    off_t curpos;

#if 0
    wordsStruct *allWords = malloc(sizeof(wordsStruct));
#endif

    if (argc <= 2) {
        printf("Please enter text file name and number of threads");
        return 1;
    }

    pfile = open(argv[1],O_RDONLY);
    if (pfile < 0) {
        perror("FILE OPEN FAILURE");
    }

    threadCount = atoi(argv[2]);

    struct argStruct *threads =
        malloc(sizeof(struct argStruct) * threadCount);
    struct argStruct *arg;

    fstat(pfile,&st);
    fileSize = st.st_size;

    filebuf = mmap(NULL,fileSize,PROT_READ,MAP_PRIVATE,pfile,0);

    divide = (fileSize / threadCount);

#if 0
    int j = 0;
#endif

    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];

#if 0
        arg->words = allWords;
#endif

        if (i == 0)
            arg->start = 0;
        else
            arg->start = arg[-1].end;

        curpos = arg->start + divide;

        for (;  curpos < fileSize;  ++curpos) {
            if (filebuf[curpos] == '\n') {
                ++curpos;
                break;
            }
        }

        if (curpos > fileSize)
            curpos = fileSize;

        arg->end = curpos;
        arg->count = i;
    }

    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];
        pthread_create(&arg->threadid, NULL, (void *) processFile, arg);
    }

    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];
        pthread_join(arg->threadid, NULL);
    }

    munmap(filebuf,fileSize);
    close(pfile);

    // get total number of words in all lists
    int totalcnt = 0;
    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];
        totalcnt += arg->totalWords;
    }

    // create a unified list [that may have duplicates]
    wordsStruct *biglist = malloc(sizeof(wordsStruct) * totalcnt);
    int bigidx = 0;
    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];

        for (int wordidx = 0;  wordidx < arg->totalWords;  ++wordidx)
            biglist[bigidx++] = arg->words[wordidx];

        free(arg->words);
    }
    free(threads);

    // sort the list
    qsort(biglist,totalcnt,sizeof(wordsStruct),wordcmp);

    // remove duplicates
    int cleancnt = 0;
    wordsStruct *cleanlist = malloc(sizeof(wordsStruct) * totalcnt);

    if (totalcnt > 0)
        cleanlist[cleancnt++] = biglist[0];

    for (int bigidx = 1; bigidx < totalcnt; ++bigidx) {
        if (strcmp(cleanlist[cleancnt - 1].word,biglist[bigidx].word) == 0)
            continue;
        cleanlist[cleancnt++] = biglist[bigidx];
    }

    free(biglist);

    // trim the list
    cleanlist = realloc(cleanlist,sizeof(wordsStruct) * cleancnt);

    return 0;
}

內存泄漏 - 如何為在另一個結構中作為線程參數傳遞的 Typdef 結構分配內存？

問題描述

1 個解決方案

解決方案1
2 已采納 2020-03-20 19:22:07

內存泄漏 - 如何為在另一個結構中作為線程參數傳遞的 Typdef 結構分配內存？

問題描述

1 個解決方案

解決方案1 2 已采納 2020-03-20 19:22:07

解決方案1
2 已采納 2020-03-20 19:22:07