繁体   English   中英

内存泄漏 - 如何为在另一个结构中作为线程参数传递的 Typdef 结构分配内存?

[英]Memory leak - How do I allocate memory for a Typdef Struct passed within another struct as thread arguments?

我对 C 非常陌生,正在努力进行适当的内存管理,并且在我的程序中遇到了许多段错误。

我的最终目标是将文本文件和输入的线程数作为用户参数,获取文件大小,然后根据用户输入的线程数拆分文件。

然后每个线程将读取文件的一部分,然后从其读取的块中提取令牌。 如果标记大于 5 个字符,则将其与标记在整个文本中出现的次数一起添加到数组中。 所以最终我希望得到文本中使用的 > 5 个前 n 个单词的列表。

然而,这可能是我第三次使用 C 了,而且我很挣扎,并且遇到了很多错误。 我一直在尝试使用 valgrind 和其他调试工具自己解决这个问题,但我很迷茫。

从 valgrind,我收到以下消息:

==27634== 1 errors in context 1 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B7B4C: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 2 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B7014: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 3 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B3875: _itoa_word (_itoa.c:179)
==27634==    by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 4 of 5:
==27634== Use of uninitialised value of size 8
==27634==    at 0x50B386B: _itoa_word (_itoa.c:179)
==27634==    by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== 
==27634== 1 errors in context 5 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634==    at 0x50B78DA: vfprintf (vfprintf.c:1642)
==27634==    by 0x50BFF25: printf (printf.c:33)
==27634==    by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634== 
==27634== ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 0 from 0)

我还看到“地址 xxx 在块大小 60 分配后为 0 字节”的消息

我相信我的问题在于:

if (strcmp(words[i].word, token) == 0) {
  inArray = i;
}

这是因为我没有为 wordStruct 单词正确分配内存吗? 我不知道如何解决它,任何帮助将不胜感激。

谢谢

完整代码如下

#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>


typedef struct {
    char word[50];
    int count;
    int totalWords;
} wordsStruct ;



struct argStruct {
    FILE *file;
    int start;
    int end;
    int count;
    wordsStruct *words;
};



int stringLength(char s[]) {
    int c = 0;
    while (s[c] != '\0')
        c++;
    return c;
}



void groomString(char *line){

    for (int i = 0; line[i] != '\0'; ++i) {
        line[i] = tolower(line[i]);

        while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {


            for (int j = i; line[j] != '\0'; j++) {
                line[j] = line[j + 1];
                line[j] = '\0';
            }
        }
    }
}

int counter(int n){
    static int test;
    test = n;

    if(n = 0){
        return test;
    } else {
        n++;
        return n;
    }

}


void processFile(void *input) {
    struct argStruct params = *(struct argStruct *) input;

    wordsStruct *words = params.words;
    FILE *textFile = params.file;
    int start = params.start;
    int end = params.end;
    int count = params.count;

    int size = (end - start) + 10;
    char delim[] = " \t\v\n\r";


    char *readFile = (malloc(sizeof(char) * size +10));
    fread(readFile, 1, size, textFile);


    char *copy = (malloc(sizeof(char) * size +10));
    strcpy(copy, readFile);

    char *saveptr;

    int inArray;
    int length;
    static int added;

    char *token = strtok_r(copy, delim, &saveptr);

    while (token) {

        groomString(token);
        length = stringLength(token);

        if (length > 5) {

            inArray = 0;


            for (int i = 0; i < added; i++) {
                if (strcmp(words[i].word, token) == 0) {
                    inArray = i;
                }
            }

            if (inArray == 0) {
                added++;
                strcpy(words[added].word, token);
                words[added].count = 1;
            } else {
                words[inArray].count++;
            }
        }

        token = strtok_r(NULL, delim, &saveptr);
    }


     words->totalWords = added;

    free(token);
    free(readFile);
}




int main(int argc, char *argv[])
{
    FILE *pfile;
    int threadCount = 0, fileSize = 0, divide = 0;
    wordsStruct *allWords = (wordsStruct *) malloc( sizeof(wordsStruct));


    if (argc > 2)
    {
        pfile = fopen( argv[1], "r");
        if (pfile == NULL){
            perror("FILE OPEN FAILURE");
        }

        threadCount = atoi(argv[2]);

        pthread_t * thread = malloc(sizeof(pthread_t)* threadCount *10);

        fseek(pfile, 0, SEEK_END);
        fileSize= ftell(pfile);
        fseek(pfile, 0, SEEK_SET);


        divide = (fileSize/threadCount);

        struct argStruct arguments;
        arguments.file = pfile;
        arguments.words = allWords;

        int j = 0;
        for(int i = 0; i < threadCount; i++) {

            arguments.start = j;
            arguments.end = j+divide;

            arguments.count = i;

            struct argStruct *passArgs = malloc(sizeof *passArgs);
            *passArgs = arguments;

            pthread_create(&thread[i], NULL, (void *) processFile, passArgs);

            j+=divide;
        }


        for (int i = 0; i < threadCount +1; i++){
            pthread_join(thread[i], NULL);
        }

        fclose(pfile);

    } else {
        printf("Please enter text file name and number of threads");
    }

return 0;
}

对于初学者,在最后一个线程中,您需要 [设置end ]:

if (i == (threadCount - 1)) arguments.end = fileSize;

获取最后一段中的所有字节,而不是超出 EOF。 只要文件大小不是线程数的精确倍数,就需要这样做。

为了防止传递给线程的argStruct内存泄漏,在processFile的底部,您需要free(input)

此外,线程共享某些资源(请参阅man pthreads )。 值得注意的是,打开文件描述符。 因此,您需要在访问textFile使用互斥锁。 (例如) pthread_mutex_lock(&text_mutex); 等。 阿尔。

而且,每个线程必须对它试图访问的文件部分执行自己的fseek

而且, size的“倾斜系数”为 10。这对于分配的区域来说是安全的,但会导致读取太多数据。 最好不要使用+ 10 ,或者这样做: int exact_size = end - start; . 另外,请注意,在malloc设置size额外数量时,您正在执行“slop”,因此不需要这样做。

另外,请注意fread不能fgets那样保证缓冲区末尾的 EOS 字符 (0x00)。 所以,如果你打算做缓冲的字符串操作,你需要执行这个自己(以及你需要至少1的“斜率”):

所以,我们需要:

pthread_mutex_lock(&text_mutex);

fseek(textFile,start,0);
fread(readFile,1,exact_size,textFile);   
readFile[exact_size] = 0;

pthread_mutex_unlock(&text_mutex);

而且,请记住, main有初始化text_mutexpthread_mutex_init做之前pthread_create

但...

在这里使用fread可能有问题。 当您将文件divide成长度为 div 的块时,您[可能] 以这样一种方式切碎文件,即第一个线程的最后一行被截断,而下一个线程将在看到它的第一个完整行之前看到该行的其余部分, 等等 ...

您最好在main对整个文件执行单个mmap ,并让它扫描缓冲区,寻找换行符 [或空格],并为每个线程提供一个保证与换行符对齐的段。


更新:

我编写了一个使用mmap的版本并修复了一个小错误 [请原谅无偿的样式清理]。 它编译干净,即使使用-O2 -Wall [您应该始终使用它来捕获所有警告]。 我没有测试过它,但它应该让你更进一步。

#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>

char *filebuf;

typedef struct {
    char word[50];
    int count;
    int totalWords;
} wordsStruct;

struct argStruct {
    pthread_t threadid;
    int start;
    int end;
    int count;
    wordsStruct *words;
};

int
stringLength(char s[])
{
    int c = 0;

    while (s[c] != '\0')
        c++;
    return c;
}

void
groomString(char *line)
{

    for (int i = 0; line[i] != '\0'; ++i) {
        line[i] = tolower(line[i]);

        while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {

            for (int j = i; line[j] != '\0'; j++) {
                line[j] = line[j + 1];
                line[j] = '\0';
            }
        }
    }
}

int
counter(int n)
{
    static int test;

    test = n;

    // NOTE/BUG: this is the assignment operator and you want equality
#if 0
    if (n = 0) {
#else
    if (n == 0) {
#endif
        return test;
    }
    else {
        n++;
        return n;
    }

}

void
processFile(void *input)
{
    struct argStruct *params = input;

    wordsStruct *words = params->words;

    int start = params->start;
    int end = params->end;
#if 0
    int count = params->count;
#endif

    int size = (end - start) + 10;
    char delim[] = " \t\v\n\r";

    char *copy = malloc(size + 1);
    memcpy(copy,&filebuf[start],size);
    copy[size] = 0;

    char *saveptr;

    int inArray;
    int length;
    static int added;

    char *token = strtok_r(copy, delim, &saveptr);

    while (token) {

        groomString(token);
        length = stringLength(token);

        if (length > 5) {

            inArray = 0;

            for (int i = 0; i < added; i++) {
                if (strcmp(words[i].word, token) == 0) {
                    inArray = i;
                }
            }

            if (inArray == 0) {
                added++;
                strcpy(words[added].word, token);
                words[added].count = 1;
            }
            else {
                words[inArray].count++;
            }
        }

        token = strtok_r(NULL, delim, &saveptr);
    }

    words->totalWords = added;

    free(copy);
    free(token);
}

int
main(int argc, char *argv[])
{
    int pfile;
    int threadCount = 0,
        fileSize = 0,
        divide = 0;
    struct stat st;
    off_t curpos;

    wordsStruct *allWords = (wordsStruct *) malloc(sizeof(wordsStruct));

    if (argc > 2) {
        pfile = open(argv[1],O_RDONLY);
        if (pfile < 0) {
            perror("FILE OPEN FAILURE");
        }

        threadCount = atoi(argv[2]);

        struct argStruct *threads =
            malloc(sizeof(struct argStruct) * threadCount);
        struct argStruct *arg;

        fstat(pfile,&st);
        fileSize = st.st_size;

        filebuf = mmap(NULL,fileSize,PROT_READ,MAP_PRIVATE,pfile,0);

        divide = (fileSize / threadCount);

#if 0
        int j = 0;
#endif

        for (int i = 0; i < threadCount; i++) {
            arg = &threads[i];

            arg->words = allWords;

            if (i == 0)
                arg->start = 0;
            else
                arg->start = arg[-1].end;

            curpos = arg->start + divide;

            for (;  curpos < fileSize;  ++curpos) {
                if (filebuf[curpos] == '\n') {
                    ++curpos;
                    break;
                }
            }

            if (curpos > fileSize)
                curpos = fileSize;

            arg->end = curpos;
            arg->count = i;
        }

        for (int i = 0; i < threadCount; i++) {
            arg = &threads[i];
            pthread_create(&arg->threadid, NULL, (void *) processFile, arg);
        }

        for (int i = 0; i < threadCount + 1; i++) {
            arg = &threads[i];
            pthread_join(arg->threadid, NULL);
        }

        munmap(filebuf,fileSize);
        close(pfile);

        free(threads);
    }
    else {
        printf("Please enter text file name and number of threads");
    }

    return 0;
}

更新#2:

哎呀,我错过了一些东西......

因为added是用static定义的,所以所有线程都尝试使用它。 他们比赛。 这将是“坏的”。 而且, params->words列表的索引将是错误的。

访问它需要线程循环顶部/底部的互斥锁/解锁对或使用原子原语(例如stdatomic.h )。

但是,互斥对会产生这样的效果,即首先获得互斥的线程将“独占”它,并且所有线程或多或少会按顺序运行。 从而破坏了拥有多个线程的目的。

因此,首先,我们要删除static以便每个线程都有自己的副本。

但是,现在事实证明,当我们向列表中添加一个新词时, words (即params->words不会“增长”。 因此,每当我们添加一个新单词时,我们都必须增加列表的大小,因此我们需要添加一个realloc调用。

在单个公共列表上执行此操作(例如,您在main分配allWords的位置)是有问题的。 由于搜索和realloc ,必须受互斥锁保护的代码的“关键部分”几乎是整个循环体。

因此,一种解决方案是让每个线程维护它自己的每个线程列表(即每个线程的params->words是不同的)。 然后,线程不会竞争并且在运行时不需要任何互斥锁。

但是,这将意味着线程之间存在重复项。

因此,在main对所有线程执行pthread_join之后, main必须重新创建一个单一的、统一的列表来消除重复项。

使用数组,这更麻烦。 链接列表可能会使重新组合各种列表变得更容易。

最简单的方法是从每个线程列表中复制所有条目,并附加到一个大列表中。

然后,对这个列表进行排序。

然后,创建一个消除重复项的新列表。

这是修复这些问题的更新版本 [再次,未经测试]:

#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>

char *filebuf;

typedef struct {
    char word[50];
    int count;
#if 0
    int totalWords;
#endif
} wordsStruct;

struct argStruct {
    pthread_t threadid;
    int start;
    int end;
    int count;
    int totalWords;
    wordsStruct *words;
};

int
stringLength(char s[])
{
    int c = 0;

    while (s[c] != '\0')
        c++;
    return c;
}

void
groomString(char *line)
{

    for (int i = 0; line[i] != '\0'; ++i) {
        line[i] = tolower(line[i]);

        while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {

            for (int j = i; line[j] != '\0'; j++) {
                line[j] = line[j + 1];
                line[j] = '\0';
            }
        }
    }
}

int
counter(int n)
{
    static int test;

    test = n;

    // NOTE/BUG: this is the assignment operator and you want equality
#if 0
    if (n = 0) {
#else
    if (n == 0) {
#endif
        return test;
    }
    else {
        n++;
        return n;
    }

}

void *
processFile(void *input)
{
    struct argStruct *params = input;

    int start = params->start;
    int end = params->end;

    int size = (end - start) + 10;
    char delim[] = " \t\v\n\r";

    char *copy = malloc(size + 1);
    memcpy(copy,&filebuf[start],size);
    copy[size] = 0;

    char *saveptr;

    int inArray;
    int length;

    char *token = strtok_r(copy, delim, &saveptr);

    int added = 0;
    params->words = NULL;
    params->count = 0;

    while (token) {
        groomString(token);
        length = stringLength(token);

        if (length > 5) {
            wordsStruct *words = params->words;

            // try to find an existing word struct for the current token
            inArray = 0;
            for (int i = 0; i < added; i++) {
                if (strcmp(words[i].word, token) == 0) {
                    inArray = i;
                    break;
                }
            }

            // found a token that is already in the words list -- just increment
            // the count
            if (inArray != 0) {
                ++words[inArray].count;
                continue;
            }

            // add a new word struct to the list
            ++added;
            words = realloc(words,sizeof(wordsStruct) * added);
            params->words = words;

            // fill it in and initialize its count
            words += added;
            strcpy(words->word, token);
            words->count = 1;
        }

        token = strtok_r(NULL, delim, &saveptr);
    }

    params->totalWords = added;

    free(copy);
    free(token);

    return (void *) 0;
}

int
wordcmp(const void *a,const void *b)
{
    const wordsStruct *wa = a;
    const wordsStruct *wb = b;

    int cmpflg = strcmp(wa->word,wb->word);

    return cmpflg;
}

int
main(int argc, char *argv[])
{
    int pfile;
    int threadCount = 0,
        fileSize = 0,
        divide = 0;
    struct stat st;
    off_t curpos;

#if 0
    wordsStruct *allWords = malloc(sizeof(wordsStruct));
#endif

    if (argc <= 2) {
        printf("Please enter text file name and number of threads");
        return 1;
    }

    pfile = open(argv[1],O_RDONLY);
    if (pfile < 0) {
        perror("FILE OPEN FAILURE");
    }

    threadCount = atoi(argv[2]);

    struct argStruct *threads =
        malloc(sizeof(struct argStruct) * threadCount);
    struct argStruct *arg;

    fstat(pfile,&st);
    fileSize = st.st_size;

    filebuf = mmap(NULL,fileSize,PROT_READ,MAP_PRIVATE,pfile,0);

    divide = (fileSize / threadCount);

#if 0
    int j = 0;
#endif

    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];

#if 0
        arg->words = allWords;
#endif

        if (i == 0)
            arg->start = 0;
        else
            arg->start = arg[-1].end;

        curpos = arg->start + divide;

        for (;  curpos < fileSize;  ++curpos) {
            if (filebuf[curpos] == '\n') {
                ++curpos;
                break;
            }
        }

        if (curpos > fileSize)
            curpos = fileSize;

        arg->end = curpos;
        arg->count = i;
    }

    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];
        pthread_create(&arg->threadid, NULL, (void *) processFile, arg);
    }

    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];
        pthread_join(arg->threadid, NULL);
    }

    munmap(filebuf,fileSize);
    close(pfile);

    // get total number of words in all lists
    int totalcnt = 0;
    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];
        totalcnt += arg->totalWords;
    }

    // create a unified list [that may have duplicates]
    wordsStruct *biglist = malloc(sizeof(wordsStruct) * totalcnt);
    int bigidx = 0;
    for (int i = 0; i < threadCount; i++) {
        arg = &threads[i];

        for (int wordidx = 0;  wordidx < arg->totalWords;  ++wordidx)
            biglist[bigidx++] = arg->words[wordidx];

        free(arg->words);
    }
    free(threads);

    // sort the list
    qsort(biglist,totalcnt,sizeof(wordsStruct),wordcmp);

    // remove duplicates
    int cleancnt = 0;
    wordsStruct *cleanlist = malloc(sizeof(wordsStruct) * totalcnt);

    if (totalcnt > 0)
        cleanlist[cleancnt++] = biglist[0];

    for (int bigidx = 1; bigidx < totalcnt; ++bigidx) {
        if (strcmp(cleanlist[cleancnt - 1].word,biglist[bigidx].word) == 0)
            continue;
        cleanlist[cleancnt++] = biglist[bigidx];
    }

    free(biglist);

    // trim the list
    cleanlist = realloc(cleanlist,sizeof(wordsStruct) * cleancnt);

    return 0;
}

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM