简体   繁体   English

为什么当回调指针指向 function 时出现 EXC_BAD_ACCESS (code=EXC_I386_GPFLT)?

[英]Why EXC_BAD_ACCESS (code=EXC_I386_GPFLT) when callback pointer to function?

The following code is trying to count word frequency in a document, by using hashset and vector .以下代码尝试使用hashsetvector计算文档中的词频。

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>


/** ==================================== VECTOR ======================================= */

typedef enum {
    true, false
} bool;

typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);

typedef struct {
    int elemSize;               //how many byte for each element
    int elemNum;                //number of current element in vector
    int capacity;               //maximum number of element vector can hold
    void *elems;                //pointer to data memory
    VectorFreeFunction freefn;  //pointer to the function used to free each element
} vector;

/**
 * Reallocate a new memory of twice of original size
 * return 1 if reallocation success, otherwise return -1.
 */
static void DoubleMemory(vector *v) {
    void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
    assert(tmp != NULL);
    v->elems = tmp;
    v->capacity *= 2;
}

/**
 * Constructor
 */
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
    v->elems = malloc(initialAllocation * elemSize);
    assert(v->elems != NULL);
    v->elemSize = elemSize;
    v->elemNum = 0;
    v->capacity = initialAllocation;
    v->freefn = freefn;
}

/**
 * Appends a new element to the end of the specified vector.
 */
void VectorAppend(vector *v, const void *elemAddr) {
    /* double size if neccessary */
    if (v->elemNum == v->capacity) DoubleMemory(v);
    memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
    v->elemNum++;
}

/**
 * Search the specified vector for an element whose contents match the element passed as the key.
 */
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
    assert(key && searchfn);
    if (v->elemNum == 0) return -1;
    assert(startIndex >= 0 && startIndex < v->elemNum);
    if (isSorted == true) {
        /* binary search */
        void *startAddr = (char *)v->elems + startIndex * v->elemSize;
        int size = v->elemNum - startIndex;
        void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
        return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
    } else {
        /* linear search */
        for (int i = 0; i < v->elemNum; i++) {
            if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
                return i;
            }
        }
        return -1;
    }
}

/**
 * Overwrites the element at the specified position.
 */
void VectorReplace(vector *v, const void *elemAddr, int position) {
    assert(position >= 0 && position < v->elemNum);
    void *posAddr = (char *)v->elems + position * v->elemSize;
    /* free the memory of old element first */
    if (v->freefn != NULL) v->freefn(posAddr);
    memcpy(posAddr, elemAddr, v->elemSize);
}





/** ==================================== HASHSET ======================================= */

typedef int (*HashSetHashFunction)(const void *elemAddr, int numBuckets);
typedef int (*HashSetCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*HashSetFreeFunction)(void *elemAddr);

typedef struct {
    int elemNum;            //current element number
    int bucketNum;          //number of hash bucket
    int elemSize;           //how many byte each element has
    vector *buckets;        //array of vector
    HashSetHashFunction hashfn;
    HashSetCompareFunction compfn;
    HashSetFreeFunction freefn;
} hashset;

void HashSetNew(hashset *h, int elemSize, int numBuckets,   
        HashSetHashFunction hashfn, HashSetCompareFunction comparefn, HashSetFreeFunction freefn) {
    assert(elemSize > 0 && numBuckets > 0 && hashfn != NULL && comparefn != NULL);
    h->buckets = (vector *)malloc(numBuckets * sizeof(vector));
    assert(h->buckets != NULL);
    for (int i = 0; i < numBuckets; i++) {
        vector *bucket = (vector *)((char *)h->buckets + i * sizeof(vector));
        VectorNew(bucket, elemSize, freefn, 4);
    }
    h->bucketNum = numBuckets;
    h->elemSize = elemSize;
    h->elemNum = 0;
    h->hashfn = hashfn;
    h->compfn = comparefn;
    h->freefn = freefn;
}

void HashSetEnter(hashset *h, const void *elemAddr) {
    int hash = h->hashfn(elemAddr, h->bucketNum);
    vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
    // search in the hash set first
    int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
    if (pos != -1) {
        // replace the old one if find a match
        VectorReplace(bucket, elemAddr, pos);
    } else {
        // otherwise insert the new one
        VectorAppend(bucket, elemAddr);
        h->elemNum++;
    }
}




/** ==================================== DOC_FREQ & WORD_INDEX ======================================= */

/****************************************************************
 *
 * doc_freq is a key-value pair of [documentid, frequency]
 * It's not supposed to be exposed to user or search engine.
 * -----------------------------------------------------------
 * It looks like:
 *      [1611742826915764000]   [4      ]  
 *      |-------------------|   |-------|
 *       docid                   freq
 ***************************************************************/
typedef struct {
    long docid;
    int freq;
} doc_freq;

static void new_docfreq(doc_freq *df, long docid, int freq) {
    df->docid = docid;
    df->freq = freq;
}

/**
 * HashSetHashFunction<doc_freq>
 */
static int hash_docfreq(const void *elemAddr, int numBuckets) {
    doc_freq *df = (doc_freq *)elemAddr;
    return (int)(df->docid % numBuckets);
}

/**
 * HashSetCompareFunction<doc_freq>
 */
static int comp_docfreq(const void *elemAddr1, const void *elemAddr2) {
    long id1 = ((doc_freq *)elemAddr1)->docid;
    long id2 = ((doc_freq *)elemAddr2)->docid;
    if (id1 < id2) {
        return -1;
    } else if (id1 > id2) {
        return 1;
    } else { // id1 == id2
        return 0;
    }
}

/**
 * word_index is a index of a single word.
 * ---------------------------------------
 * A typical word_index looks like:
 *      [apple]: [doc1, 5], [doc3, 10], [doc5, 7]
 *      |-----|  |------------------------------|
 *       word     freqs
 */
typedef struct {
    char *word;
    hashset *freqs; // hashset<doc_freq>
} word_index;

static const size_t kWordIndexHashSetBuckets = 64;
static void new_wordindex(word_index *wi, const char *word) {
    hashset h;
    HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
    wi->freqs = &h;
    size_t wordlen = strlen(word);
    wi->word = (char *)malloc(wordlen + 1); // +1 for null-termination
    strcpy(wi->word, word);
    (wi->word)[wordlen] = '\0';
}

/**
 * Mainly used to build a word_index.
 */
void add_docfreq(word_index *wi, const long docid, const int frequency) {
    doc_freq df;
    new_docfreq(&df, docid, frequency);
    HashSetEnter(wi->freqs, &df);
}





/** ==================================== UNIT-TEST ======================================= */

int main(void) {
    /* apple:   [1611742826915764000, 5][1611742826915538000, 10] */
    word_index *apple = (word_index *)malloc(sizeof(word_index));
    new_wordindex(apple, "apple");
    add_docfreq(apple, 1611742826915764000L, 5);
    add_docfreq(apple, 1611742826915538000L, 10);
}

It gave me a segmentation fault :它给了我一个segmentation fault

[1]    84309 segmentation fault  testindexer

lldb find the problem occured when hashset try to callback the given pointer of function hashfn . lldb 发现当hashset尝试回调 function hashfn的给定指针时发生的问题。 I don't quite understand what is EXC_BAD_ACCESS (code=EXC_I386_GPFLT) here.我不太明白这里的EXC_BAD_ACCESS (code=EXC_I386_GPFLT)是什么。 I have done several unit test on hashset before, the HashSetEnter() function worked well with hashfn .我之前对 hashset 进行了几次单元测试, HashSetEnter() function 与hashfn配合得很好。 Another unit test was conducted on hash_docfreq() function, it can also calculate correctly the hash number.又对hash_docfreq() function 进行了单元测试,也能正确计算出 hash 数。 I'm a little bit confused.我有点困惑。 Anyone can help?任何人都可以帮忙吗? Thanks!谢谢!

Process 89962 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
    frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
   127  }
   128
   129  void HashSetEnter(hashset *h, const void *elemAddr) {
-> 130      int hash = h->hashfn(elemAddr, h->bucketNum);
   131      vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
   132      // search in the hash set first
   133      int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
Target 0: (testnothing) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
  * frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
    frame #1: 0x0000000100003c37 testnothing`add_docfreq(wi=0x0000000100306060, docid=1611742826915764000, frequency=5) at test_nothing.c:222:2
    frame #2: 0x0000000100003cae testnothing`main at test_nothing.c:235:2
    frame #3: 0x00007fff70df0cc9 libdyld.dylib`start + 1
(lldb)

Running under gdb , after the fault, doing a tb command to get a stack traceback, we see:gdb下运行,故障后,执行tb命令获取堆栈回溯,我们看到:

#0  0x00000005004016e6 in ?? ()
#1  0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
    elemAddr=0x7fffffffdc40) at orig.c:150
#2  0x0000000000401834 in add_docfreq (wi=0x405260, docid=1611742826915764000,
    frequency=5) at orig.c:266
#3  0x0000000000401879 in main () at orig.c:278
(gdb) frame 1
#1  0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
    elemAddr=0x7fffffffdc40) at orig.c:150
150     int hash = h->hashfn(elemAddr, h->bucketNum);

You are segfaulting in HashSetEnter , at the line:您在HashSetEnter中出现段错误,位于以下行:

int hash = h->hashfn(elemAddr, h->bucketNum);

This is because h is not valid at this point.这是因为此时h无效


Examinining the source, the place that sets the value that is ultimately invalid, it is set in new_wordindex .检查源,设置最终无效的值的地方,它设置在new_wordindex中。

In new_wordindex , you are saving [and returning] the address of h .new_wordindex中,您正在保存 [并返回] h的地址。

h is a function scoped variable here, so it is no longer valid after the function returns. h在这里是一个function作用域变量,所以在function返回后不再有效。

You have to use malloc for this.为此,您必须使用malloc And, later, you need to be able to free this pointer during cleanup.而且,稍后,您需要能够在清理期间free此指针。


Here's the refactored code for the incorrect function.这是不正确的 function 的重构代码。

Note that to show old/original code vs. new/corrected code, I'm using preprocessor conditionals:请注意,为了显示旧/原始代码与新/更正代码,我使用了预处理器条件:

#if 0
// old/original code
// NOTE: this is _not_ compiled in
#else
// new/corrected code
// NOTE: this _is_ compiled in
#endif

The code under #if 0 can be elided/removed, leaving just the #else code. #if 0下的代码可以省略/删除,只留下#else代码。

static void
new_wordindex(word_index * wi, const char *word)
{
// NOTE/BUG: h is function scoped -- this can _not_ be saved and returned
// because it ceases to be valid when we return
#if 0
    hashset h;
    HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
    wi->freqs = &h;
#else
    hashset *h = malloc(sizeof(*h));
    HashSetNew(h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
    wi->freqs = h;
#endif

    size_t wordlen = strlen(word);

    wi->word = (char *) malloc(wordlen + 1);    // +1 for null-termination
    strcpy(wi->word, word);
    (wi->word)[wordlen] = '\0';
}

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM