[英]Why EXC_BAD_ACCESS (code=EXC_I386_GPFLT) when callback pointer to function?
The following code is trying to count word frequency in a document, by using hashset
and vector
.以下代码尝试使用hashset
和vector
计算文档中的词频。
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/** ==================================== VECTOR ======================================= */
typedef enum {
true, false
} bool;
typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);
typedef struct {
int elemSize; //how many byte for each element
int elemNum; //number of current element in vector
int capacity; //maximum number of element vector can hold
void *elems; //pointer to data memory
VectorFreeFunction freefn; //pointer to the function used to free each element
} vector;
/**
* Reallocate a new memory of twice of original size
* return 1 if reallocation success, otherwise return -1.
*/
static void DoubleMemory(vector *v) {
void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
assert(tmp != NULL);
v->elems = tmp;
v->capacity *= 2;
}
/**
* Constructor
*/
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
v->elems = malloc(initialAllocation * elemSize);
assert(v->elems != NULL);
v->elemSize = elemSize;
v->elemNum = 0;
v->capacity = initialAllocation;
v->freefn = freefn;
}
/**
* Appends a new element to the end of the specified vector.
*/
void VectorAppend(vector *v, const void *elemAddr) {
/* double size if neccessary */
if (v->elemNum == v->capacity) DoubleMemory(v);
memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
v->elemNum++;
}
/**
* Search the specified vector for an element whose contents match the element passed as the key.
*/
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
assert(key && searchfn);
if (v->elemNum == 0) return -1;
assert(startIndex >= 0 && startIndex < v->elemNum);
if (isSorted == true) {
/* binary search */
void *startAddr = (char *)v->elems + startIndex * v->elemSize;
int size = v->elemNum - startIndex;
void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
} else {
/* linear search */
for (int i = 0; i < v->elemNum; i++) {
if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
return i;
}
}
return -1;
}
}
/**
* Overwrites the element at the specified position.
*/
void VectorReplace(vector *v, const void *elemAddr, int position) {
assert(position >= 0 && position < v->elemNum);
void *posAddr = (char *)v->elems + position * v->elemSize;
/* free the memory of old element first */
if (v->freefn != NULL) v->freefn(posAddr);
memcpy(posAddr, elemAddr, v->elemSize);
}
/** ==================================== HASHSET ======================================= */
typedef int (*HashSetHashFunction)(const void *elemAddr, int numBuckets);
typedef int (*HashSetCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*HashSetFreeFunction)(void *elemAddr);
typedef struct {
int elemNum; //current element number
int bucketNum; //number of hash bucket
int elemSize; //how many byte each element has
vector *buckets; //array of vector
HashSetHashFunction hashfn;
HashSetCompareFunction compfn;
HashSetFreeFunction freefn;
} hashset;
void HashSetNew(hashset *h, int elemSize, int numBuckets,
HashSetHashFunction hashfn, HashSetCompareFunction comparefn, HashSetFreeFunction freefn) {
assert(elemSize > 0 && numBuckets > 0 && hashfn != NULL && comparefn != NULL);
h->buckets = (vector *)malloc(numBuckets * sizeof(vector));
assert(h->buckets != NULL);
for (int i = 0; i < numBuckets; i++) {
vector *bucket = (vector *)((char *)h->buckets + i * sizeof(vector));
VectorNew(bucket, elemSize, freefn, 4);
}
h->bucketNum = numBuckets;
h->elemSize = elemSize;
h->elemNum = 0;
h->hashfn = hashfn;
h->compfn = comparefn;
h->freefn = freefn;
}
void HashSetEnter(hashset *h, const void *elemAddr) {
int hash = h->hashfn(elemAddr, h->bucketNum);
vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
// search in the hash set first
int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
if (pos != -1) {
// replace the old one if find a match
VectorReplace(bucket, elemAddr, pos);
} else {
// otherwise insert the new one
VectorAppend(bucket, elemAddr);
h->elemNum++;
}
}
/** ==================================== DOC_FREQ & WORD_INDEX ======================================= */
/****************************************************************
*
* doc_freq is a key-value pair of [documentid, frequency]
* It's not supposed to be exposed to user or search engine.
* -----------------------------------------------------------
* It looks like:
* [1611742826915764000] [4 ]
* |-------------------| |-------|
* docid freq
***************************************************************/
typedef struct {
long docid;
int freq;
} doc_freq;
static void new_docfreq(doc_freq *df, long docid, int freq) {
df->docid = docid;
df->freq = freq;
}
/**
* HashSetHashFunction<doc_freq>
*/
static int hash_docfreq(const void *elemAddr, int numBuckets) {
doc_freq *df = (doc_freq *)elemAddr;
return (int)(df->docid % numBuckets);
}
/**
* HashSetCompareFunction<doc_freq>
*/
static int comp_docfreq(const void *elemAddr1, const void *elemAddr2) {
long id1 = ((doc_freq *)elemAddr1)->docid;
long id2 = ((doc_freq *)elemAddr2)->docid;
if (id1 < id2) {
return -1;
} else if (id1 > id2) {
return 1;
} else { // id1 == id2
return 0;
}
}
/**
* word_index is a index of a single word.
* ---------------------------------------
* A typical word_index looks like:
* [apple]: [doc1, 5], [doc3, 10], [doc5, 7]
* |-----| |------------------------------|
* word freqs
*/
typedef struct {
char *word;
hashset *freqs; // hashset<doc_freq>
} word_index;
static const size_t kWordIndexHashSetBuckets = 64;
static void new_wordindex(word_index *wi, const char *word) {
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
size_t wordlen = strlen(word);
wi->word = (char *)malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '\0';
}
/**
* Mainly used to build a word_index.
*/
void add_docfreq(word_index *wi, const long docid, const int frequency) {
doc_freq df;
new_docfreq(&df, docid, frequency);
HashSetEnter(wi->freqs, &df);
}
/** ==================================== UNIT-TEST ======================================= */
int main(void) {
/* apple: [1611742826915764000, 5][1611742826915538000, 10] */
word_index *apple = (word_index *)malloc(sizeof(word_index));
new_wordindex(apple, "apple");
add_docfreq(apple, 1611742826915764000L, 5);
add_docfreq(apple, 1611742826915538000L, 10);
}
It gave me a segmentation fault
:它给了我一个segmentation fault
:
[1] 84309 segmentation fault testindexer
lldb find the problem occured when hashset
try to callback the given pointer of function hashfn
. lldb 发现当hashset
尝试回调 function hashfn
的给定指针时发生的问题。 I don't quite understand what is EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
here.我不太明白这里的EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
是什么。 I have done several unit test on hashset before, the HashSetEnter()
function worked well with hashfn
.我之前对 hashset 进行了几次单元测试, HashSetEnter()
function 与hashfn
配合得很好。 Another unit test was conducted on hash_docfreq()
function, it can also calculate correctly the hash number.又对hash_docfreq()
function 进行了单元测试,也能正确计算出 hash 数。 I'm a little bit confused.我有点困惑。 Anyone can help?任何人都可以帮忙吗? Thanks!谢谢!
Process 89962 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
127 }
128
129 void HashSetEnter(hashset *h, const void *elemAddr) {
-> 130 int hash = h->hashfn(elemAddr, h->bucketNum);
131 vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
132 // search in the hash set first
133 int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
Target 0: (testnothing) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
frame #1: 0x0000000100003c37 testnothing`add_docfreq(wi=0x0000000100306060, docid=1611742826915764000, frequency=5) at test_nothing.c:222:2
frame #2: 0x0000000100003cae testnothing`main at test_nothing.c:235:2
frame #3: 0x00007fff70df0cc9 libdyld.dylib`start + 1
(lldb)
Running under gdb
, after the fault, doing a tb
command to get a stack traceback, we see:在gdb
下运行,故障后,执行tb
命令获取堆栈回溯,我们看到:
#0 0x00000005004016e6 in ?? ()
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
#2 0x0000000000401834 in add_docfreq (wi=0x405260, docid=1611742826915764000,
frequency=5) at orig.c:266
#3 0x0000000000401879 in main () at orig.c:278
(gdb) frame 1
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
150 int hash = h->hashfn(elemAddr, h->bucketNum);
You are segfaulting in HashSetEnter
, at the line:您在HashSetEnter
中出现段错误,位于以下行:
int hash = h->hashfn(elemAddr, h->bucketNum);
This is because h
is not valid at this point.这是因为此时h
无效。
Examinining the source, the place that sets the value that is ultimately invalid, it is set in new_wordindex
.检查源,设置最终无效的值的地方,它设置在new_wordindex
中。
In new_wordindex
, you are saving [and returning] the address of h
.在new_wordindex
中,您正在保存 [并返回] h
的地址。
h
is a function scoped variable here, so it is no longer valid after the function returns. h
在这里是一个function作用域变量,所以在function返回后不再有效。
You have to use malloc
for this.为此,您必须使用malloc
。 And, later, you need to be able to free
this pointer during cleanup.而且,稍后,您需要能够在清理期间free
此指针。
Here's the refactored code for the incorrect function.这是不正确的 function 的重构代码。
Note that to show old/original code vs. new/corrected code, I'm using preprocessor conditionals:请注意,为了显示旧/原始代码与新/更正代码,我使用了预处理器条件:
#if 0
// old/original code
// NOTE: this is _not_ compiled in
#else
// new/corrected code
// NOTE: this _is_ compiled in
#endif
The code under #if 0
can be elided/removed, leaving just the #else
code. #if 0
下的代码可以省略/删除,只留下#else
代码。
static void
new_wordindex(word_index * wi, const char *word)
{
// NOTE/BUG: h is function scoped -- this can _not_ be saved and returned
// because it ceases to be valid when we return
#if 0
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
#else
hashset *h = malloc(sizeof(*h));
HashSetNew(h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = h;
#endif
size_t wordlen = strlen(word);
wi->word = (char *) malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '\0';
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.