[英]Memory leak - How do I allocate memory for a Typdef Struct passed within another struct as thread arguments?
我對 C 非常陌生,正在努力進行適當的內存管理,並且在我的程序中遇到了許多段錯誤。
我的最終目標是將文本文件和輸入的線程數作為用戶參數,獲取文件大小,然后根據用戶輸入的線程數拆分文件。
然后每個線程將讀取文件的一部分,然后從其讀取的塊中提取令牌。 如果標記大於 5 個字符,則將其與標記在整個文本中出現的次數一起添加到數組中。 所以最終我希望得到文本中使用的 > 5 個前 n 個單詞的列表。
然而,這可能是我第三次使用 C 了,而且我很掙扎,並且遇到了很多錯誤。 我一直在嘗試使用 valgrind 和其他調試工具自己解決這個問題,但我很迷茫。
從 valgrind,我收到以下消息:
==27634== 1 errors in context 1 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B7B4C: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 2 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B7014: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 3 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B3875: _itoa_word (_itoa.c:179)
==27634== by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 4 of 5:
==27634== Use of uninitialised value of size 8
==27634== at 0x50B386B: _itoa_word (_itoa.c:179)
==27634== by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 5 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B78DA: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634== ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 0 from 0)
我還看到“地址 xxx 在塊大小 60 分配后為 0 字節”的消息
我相信我的問題在於:
if (strcmp(words[i].word, token) == 0) {
inArray = i;
}
這是因為我沒有為 wordStruct 單詞正確分配內存嗎? 我不知道如何解決它,任何幫助將不勝感激。
謝謝
完整代碼如下
#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
typedef struct {
char word[50];
int count;
int totalWords;
} wordsStruct ;
struct argStruct {
FILE *file;
int start;
int end;
int count;
wordsStruct *words;
};
int stringLength(char s[]) {
int c = 0;
while (s[c] != '\0')
c++;
return c;
}
void groomString(char *line){
for (int i = 0; line[i] != '\0'; ++i) {
line[i] = tolower(line[i]);
while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {
for (int j = i; line[j] != '\0'; j++) {
line[j] = line[j + 1];
line[j] = '\0';
}
}
}
}
int counter(int n){
static int test;
test = n;
if(n = 0){
return test;
} else {
n++;
return n;
}
}
void processFile(void *input) {
struct argStruct params = *(struct argStruct *) input;
wordsStruct *words = params.words;
FILE *textFile = params.file;
int start = params.start;
int end = params.end;
int count = params.count;
int size = (end - start) + 10;
char delim[] = " \t\v\n\r";
char *readFile = (malloc(sizeof(char) * size +10));
fread(readFile, 1, size, textFile);
char *copy = (malloc(sizeof(char) * size +10));
strcpy(copy, readFile);
char *saveptr;
int inArray;
int length;
static int added;
char *token = strtok_r(copy, delim, &saveptr);
while (token) {
groomString(token);
length = stringLength(token);
if (length > 5) {
inArray = 0;
for (int i = 0; i < added; i++) {
if (strcmp(words[i].word, token) == 0) {
inArray = i;
}
}
if (inArray == 0) {
added++;
strcpy(words[added].word, token);
words[added].count = 1;
} else {
words[inArray].count++;
}
}
token = strtok_r(NULL, delim, &saveptr);
}
words->totalWords = added;
free(token);
free(readFile);
}
int main(int argc, char *argv[])
{
FILE *pfile;
int threadCount = 0, fileSize = 0, divide = 0;
wordsStruct *allWords = (wordsStruct *) malloc( sizeof(wordsStruct));
if (argc > 2)
{
pfile = fopen( argv[1], "r");
if (pfile == NULL){
perror("FILE OPEN FAILURE");
}
threadCount = atoi(argv[2]);
pthread_t * thread = malloc(sizeof(pthread_t)* threadCount *10);
fseek(pfile, 0, SEEK_END);
fileSize= ftell(pfile);
fseek(pfile, 0, SEEK_SET);
divide = (fileSize/threadCount);
struct argStruct arguments;
arguments.file = pfile;
arguments.words = allWords;
int j = 0;
for(int i = 0; i < threadCount; i++) {
arguments.start = j;
arguments.end = j+divide;
arguments.count = i;
struct argStruct *passArgs = malloc(sizeof *passArgs);
*passArgs = arguments;
pthread_create(&thread[i], NULL, (void *) processFile, passArgs);
j+=divide;
}
for (int i = 0; i < threadCount +1; i++){
pthread_join(thread[i], NULL);
}
fclose(pfile);
} else {
printf("Please enter text file name and number of threads");
}
return 0;
}
對於初學者,在最后一個線程中,您需要 [設置end
]:
if (i == (threadCount - 1)) arguments.end = fileSize;
獲取最后一段中的所有字節,而不是超出 EOF。 只要文件大小不是線程數的精確倍數,就需要這樣做。
為了防止傳遞給線程的argStruct
內存泄漏,在processFile
的底部,您需要free(input)
此外,線程共享某些資源(請參閱man pthreads
)。 值得注意的是,打開文件描述符。 因此,您需要在訪問textFile
使用互斥鎖。 (例如) pthread_mutex_lock(&text_mutex);
等。 阿爾。
而且,每個線程必須對它試圖訪問的文件部分執行自己的fseek
。
而且, size
的“傾斜系數”為 10。這對於分配的區域來說是安全的,但會導致讀取太多數據。 最好不要使用+ 10
,或者這樣做: int exact_size = end - start;
. 另外,請注意,在malloc
設置size
和額外數量時,您正在執行“slop”,因此不需要這樣做。
另外,請注意fread
並不能像fgets
那樣保證緩沖區末尾的 EOS 字符 (0x00)。 所以,如果你打算做緩沖的字符串操作,你需要執行這個自己(以及你將需要至少1的“斜率”):
所以,我們需要:
pthread_mutex_lock(&text_mutex);
fseek(textFile,start,0);
fread(readFile,1,exact_size,textFile);
readFile[exact_size] = 0;
pthread_mutex_unlock(&text_mutex);
而且,請記住, main
有初始化text_mutex
與pthread_mutex_init
做之前pthread_create
。
但...
在這里使用fread
可能有問題。 當您將文件divide
成長度為 div 的塊時,您[可能] 以這樣一種方式切碎文件,即第一個線程的最后一行被截斷,而下一個線程將在看到它的第一個完整行之前看到該行的其余部分, 等等 ...
您最好在main
對整個文件執行單個mmap
,並讓它掃描緩沖區,尋找換行符 [或空格],並為每個線程提供一個保證與換行符對齊的段。
更新:
我編寫了一個使用mmap
的版本並修復了一個小錯誤 [請原諒無償的樣式清理]。 它編譯干凈,即使使用-O2 -Wall
[您應該始終使用它來捕獲所有警告]。 我沒有測試過它,但它應該讓你更進一步。
#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>
char *filebuf;
typedef struct {
char word[50];
int count;
int totalWords;
} wordsStruct;
struct argStruct {
pthread_t threadid;
int start;
int end;
int count;
wordsStruct *words;
};
int
stringLength(char s[])
{
int c = 0;
while (s[c] != '\0')
c++;
return c;
}
void
groomString(char *line)
{
for (int i = 0; line[i] != '\0'; ++i) {
line[i] = tolower(line[i]);
while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {
for (int j = i; line[j] != '\0'; j++) {
line[j] = line[j + 1];
line[j] = '\0';
}
}
}
}
int
counter(int n)
{
static int test;
test = n;
// NOTE/BUG: this is the assignment operator and you want equality
#if 0
if (n = 0) {
#else
if (n == 0) {
#endif
return test;
}
else {
n++;
return n;
}
}
void
processFile(void *input)
{
struct argStruct *params = input;
wordsStruct *words = params->words;
int start = params->start;
int end = params->end;
#if 0
int count = params->count;
#endif
int size = (end - start) + 10;
char delim[] = " \t\v\n\r";
char *copy = malloc(size + 1);
memcpy(copy,&filebuf[start],size);
copy[size] = 0;
char *saveptr;
int inArray;
int length;
static int added;
char *token = strtok_r(copy, delim, &saveptr);
while (token) {
groomString(token);
length = stringLength(token);
if (length > 5) {
inArray = 0;
for (int i = 0; i < added; i++) {
if (strcmp(words[i].word, token) == 0) {
inArray = i;
}
}
if (inArray == 0) {
added++;
strcpy(words[added].word, token);
words[added].count = 1;
}
else {
words[inArray].count++;
}
}
token = strtok_r(NULL, delim, &saveptr);
}
words->totalWords = added;
free(copy);
free(token);
}
int
main(int argc, char *argv[])
{
int pfile;
int threadCount = 0,
fileSize = 0,
divide = 0;
struct stat st;
off_t curpos;
wordsStruct *allWords = (wordsStruct *) malloc(sizeof(wordsStruct));
if (argc > 2) {
pfile = open(argv[1],O_RDONLY);
if (pfile < 0) {
perror("FILE OPEN FAILURE");
}
threadCount = atoi(argv[2]);
struct argStruct *threads =
malloc(sizeof(struct argStruct) * threadCount);
struct argStruct *arg;
fstat(pfile,&st);
fileSize = st.st_size;
filebuf = mmap(NULL,fileSize,PROT_READ,MAP_PRIVATE,pfile,0);
divide = (fileSize / threadCount);
#if 0
int j = 0;
#endif
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
arg->words = allWords;
if (i == 0)
arg->start = 0;
else
arg->start = arg[-1].end;
curpos = arg->start + divide;
for (; curpos < fileSize; ++curpos) {
if (filebuf[curpos] == '\n') {
++curpos;
break;
}
}
if (curpos > fileSize)
curpos = fileSize;
arg->end = curpos;
arg->count = i;
}
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
pthread_create(&arg->threadid, NULL, (void *) processFile, arg);
}
for (int i = 0; i < threadCount + 1; i++) {
arg = &threads[i];
pthread_join(arg->threadid, NULL);
}
munmap(filebuf,fileSize);
close(pfile);
free(threads);
}
else {
printf("Please enter text file name and number of threads");
}
return 0;
}
更新#2:
哎呀,我錯過了一些東西......
因為added
是用static
定義的,所以所有線程都嘗試使用它。 他們比賽。 這將是“壞的”。 而且, params->words
列表的索引將是錯誤的。
訪問它需要線程循環頂部/底部的互斥鎖/解鎖對或使用原子原語(例如stdatomic.h
)。
但是,互斥對會產生這樣的效果,即首先獲得互斥的線程將“獨占”它,並且所有線程或多或少會按順序運行。 從而破壞了擁有多個線程的目的。
因此,首先,我們要刪除static
以便每個線程都有自己的副本。
但是,現在事實證明,當我們向列表中添加一個新詞時, words
(即params->words
)不會“增長”。 因此,每當我們添加一個新單詞時,我們都必須增加列表的大小,因此我們需要添加一個realloc
調用。
在單個公共列表上執行此操作(例如,您在main
分配allWords
的位置)是有問題的。 由於搜索和realloc
,必須受互斥鎖保護的代碼的“關鍵部分”幾乎是整個循環體。
因此,一種解決方案是讓每個線程維護它自己的每個線程列表(即每個線程的params->words
是不同的)。 然后,線程不會競爭並且在運行時不需要任何互斥鎖。
但是,這將意味着線程之間存在重復項。
因此,在main
對所有線程執行pthread_join
之后, main
必須重新創建一個單一的、統一的列表來消除重復項。
使用數組,這更麻煩。 鏈接列表可能會使重新組合各種列表變得更容易。
最簡單的方法是從每個線程列表中復制所有條目,並附加到一個大列表中。
然后,對這個列表進行排序。
然后,創建一個消除重復項的新列表。
這是修復這些問題的更新版本 [再次,未經測試]:
#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>
char *filebuf;
typedef struct {
char word[50];
int count;
#if 0
int totalWords;
#endif
} wordsStruct;
struct argStruct {
pthread_t threadid;
int start;
int end;
int count;
int totalWords;
wordsStruct *words;
};
int
stringLength(char s[])
{
int c = 0;
while (s[c] != '\0')
c++;
return c;
}
void
groomString(char *line)
{
for (int i = 0; line[i] != '\0'; ++i) {
line[i] = tolower(line[i]);
while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {
for (int j = i; line[j] != '\0'; j++) {
line[j] = line[j + 1];
line[j] = '\0';
}
}
}
}
int
counter(int n)
{
static int test;
test = n;
// NOTE/BUG: this is the assignment operator and you want equality
#if 0
if (n = 0) {
#else
if (n == 0) {
#endif
return test;
}
else {
n++;
return n;
}
}
void *
processFile(void *input)
{
struct argStruct *params = input;
int start = params->start;
int end = params->end;
int size = (end - start) + 10;
char delim[] = " \t\v\n\r";
char *copy = malloc(size + 1);
memcpy(copy,&filebuf[start],size);
copy[size] = 0;
char *saveptr;
int inArray;
int length;
char *token = strtok_r(copy, delim, &saveptr);
int added = 0;
params->words = NULL;
params->count = 0;
while (token) {
groomString(token);
length = stringLength(token);
if (length > 5) {
wordsStruct *words = params->words;
// try to find an existing word struct for the current token
inArray = 0;
for (int i = 0; i < added; i++) {
if (strcmp(words[i].word, token) == 0) {
inArray = i;
break;
}
}
// found a token that is already in the words list -- just increment
// the count
if (inArray != 0) {
++words[inArray].count;
continue;
}
// add a new word struct to the list
++added;
words = realloc(words,sizeof(wordsStruct) * added);
params->words = words;
// fill it in and initialize its count
words += added;
strcpy(words->word, token);
words->count = 1;
}
token = strtok_r(NULL, delim, &saveptr);
}
params->totalWords = added;
free(copy);
free(token);
return (void *) 0;
}
int
wordcmp(const void *a,const void *b)
{
const wordsStruct *wa = a;
const wordsStruct *wb = b;
int cmpflg = strcmp(wa->word,wb->word);
return cmpflg;
}
int
main(int argc, char *argv[])
{
int pfile;
int threadCount = 0,
fileSize = 0,
divide = 0;
struct stat st;
off_t curpos;
#if 0
wordsStruct *allWords = malloc(sizeof(wordsStruct));
#endif
if (argc <= 2) {
printf("Please enter text file name and number of threads");
return 1;
}
pfile = open(argv[1],O_RDONLY);
if (pfile < 0) {
perror("FILE OPEN FAILURE");
}
threadCount = atoi(argv[2]);
struct argStruct *threads =
malloc(sizeof(struct argStruct) * threadCount);
struct argStruct *arg;
fstat(pfile,&st);
fileSize = st.st_size;
filebuf = mmap(NULL,fileSize,PROT_READ,MAP_PRIVATE,pfile,0);
divide = (fileSize / threadCount);
#if 0
int j = 0;
#endif
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
#if 0
arg->words = allWords;
#endif
if (i == 0)
arg->start = 0;
else
arg->start = arg[-1].end;
curpos = arg->start + divide;
for (; curpos < fileSize; ++curpos) {
if (filebuf[curpos] == '\n') {
++curpos;
break;
}
}
if (curpos > fileSize)
curpos = fileSize;
arg->end = curpos;
arg->count = i;
}
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
pthread_create(&arg->threadid, NULL, (void *) processFile, arg);
}
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
pthread_join(arg->threadid, NULL);
}
munmap(filebuf,fileSize);
close(pfile);
// get total number of words in all lists
int totalcnt = 0;
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
totalcnt += arg->totalWords;
}
// create a unified list [that may have duplicates]
wordsStruct *biglist = malloc(sizeof(wordsStruct) * totalcnt);
int bigidx = 0;
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
for (int wordidx = 0; wordidx < arg->totalWords; ++wordidx)
biglist[bigidx++] = arg->words[wordidx];
free(arg->words);
}
free(threads);
// sort the list
qsort(biglist,totalcnt,sizeof(wordsStruct),wordcmp);
// remove duplicates
int cleancnt = 0;
wordsStruct *cleanlist = malloc(sizeof(wordsStruct) * totalcnt);
if (totalcnt > 0)
cleanlist[cleancnt++] = biglist[0];
for (int bigidx = 1; bigidx < totalcnt; ++bigidx) {
if (strcmp(cleanlist[cleancnt - 1].word,biglist[bigidx].word) == 0)
continue;
cleanlist[cleancnt++] = biglist[bigidx];
}
free(biglist);
// trim the list
cleanlist = realloc(cleanlist,sizeof(wordsStruct) * cleancnt);
return 0;
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.