为什么当回调指针指向 function 时出现 EXC_BAD_ACCESS (code=EXC_I386_GPFLT)？

Question

The following code is trying to count word frequency in a document, by using hashset and vector .以下代码尝试使用hashset和vector计算文档中的词频。

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>


/** ==================================== VECTOR ======================================= */

typedef enum {
    true, false
} bool;

typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);

typedef struct {
    int elemSize;               //how many byte for each element
    int elemNum;                //number of current element in vector
    int capacity;               //maximum number of element vector can hold
    void *elems;                //pointer to data memory
    VectorFreeFunction freefn;  //pointer to the function used to free each element
} vector;

/**
 * Reallocate a new memory of twice of original size
 * return 1 if reallocation success, otherwise return -1.
 */
static void DoubleMemory(vector *v) {
    void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
    assert(tmp != NULL);
    v->elems = tmp;
    v->capacity *= 2;
}

/**
 * Constructor
 */
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
    v->elems = malloc(initialAllocation * elemSize);
    assert(v->elems != NULL);
    v->elemSize = elemSize;
    v->elemNum = 0;
    v->capacity = initialAllocation;
    v->freefn = freefn;
}

/**
 * Appends a new element to the end of the specified vector.
 */
void VectorAppend(vector *v, const void *elemAddr) {
    /* double size if neccessary */
    if (v->elemNum == v->capacity) DoubleMemory(v);
    memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
    v->elemNum++;
}

/**
 * Search the specified vector for an element whose contents match the element passed as the key.
 */
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
    assert(key && searchfn);
    if (v->elemNum == 0) return -1;
    assert(startIndex >= 0 && startIndex < v->elemNum);
    if (isSorted == true) {
        /* binary search */
        void *startAddr = (char *)v->elems + startIndex * v->elemSize;
        int size = v->elemNum - startIndex;
        void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
        return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
    } else {
        /* linear search */
        for (int i = 0; i < v->elemNum; i++) {
            if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
                return i;
            }
        }
        return -1;
    }
}

/**
 * Overwrites the element at the specified position.
 */
void VectorReplace(vector *v, const void *elemAddr, int position) {
    assert(position >= 0 && position < v->elemNum);
    void *posAddr = (char *)v->elems + position * v->elemSize;
    /* free the memory of old element first */
    if (v->freefn != NULL) v->freefn(posAddr);
    memcpy(posAddr, elemAddr, v->elemSize);
}





/** ==================================== HASHSET ======================================= */

typedef int (*HashSetHashFunction)(const void *elemAddr, int numBuckets);
typedef int (*HashSetCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*HashSetFreeFunction)(void *elemAddr);

typedef struct {
    int elemNum;            //current element number
    int bucketNum;          //number of hash bucket
    int elemSize;           //how many byte each element has
    vector *buckets;        //array of vector
    HashSetHashFunction hashfn;
    HashSetCompareFunction compfn;
    HashSetFreeFunction freefn;
} hashset;

void HashSetNew(hashset *h, int elemSize, int numBuckets,   
        HashSetHashFunction hashfn, HashSetCompareFunction comparefn, HashSetFreeFunction freefn) {
    assert(elemSize > 0 && numBuckets > 0 && hashfn != NULL && comparefn != NULL);
    h->buckets = (vector *)malloc(numBuckets * sizeof(vector));
    assert(h->buckets != NULL);
    for (int i = 0; i < numBuckets; i++) {
        vector *bucket = (vector *)((char *)h->buckets + i * sizeof(vector));
        VectorNew(bucket, elemSize, freefn, 4);
    }
    h->bucketNum = numBuckets;
    h->elemSize = elemSize;
    h->elemNum = 0;
    h->hashfn = hashfn;
    h->compfn = comparefn;
    h->freefn = freefn;
}

void HashSetEnter(hashset *h, const void *elemAddr) {
    int hash = h->hashfn(elemAddr, h->bucketNum);
    vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
    // search in the hash set first
    int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
    if (pos != -1) {
        // replace the old one if find a match
        VectorReplace(bucket, elemAddr, pos);
    } else {
        // otherwise insert the new one
        VectorAppend(bucket, elemAddr);
        h->elemNum++;
    }
}




/** ==================================== DOC_FREQ & WORD_INDEX ======================================= */

/****************************************************************
 *
 * doc_freq is a key-value pair of [documentid, frequency]
 * It's not supposed to be exposed to user or search engine.
 * -----------------------------------------------------------
 * It looks like:
 *      [1611742826915764000]   [4      ]  
 *      |-------------------|   |-------|
 *       docid                   freq
 ***************************************************************/
typedef struct {
    long docid;
    int freq;
} doc_freq;

static void new_docfreq(doc_freq *df, long docid, int freq) {
    df->docid = docid;
    df->freq = freq;
}

/**
 * HashSetHashFunction<doc_freq>
 */
static int hash_docfreq(const void *elemAddr, int numBuckets) {
    doc_freq *df = (doc_freq *)elemAddr;
    return (int)(df->docid % numBuckets);
}

/**
 * HashSetCompareFunction<doc_freq>
 */
static int comp_docfreq(const void *elemAddr1, const void *elemAddr2) {
    long id1 = ((doc_freq *)elemAddr1)->docid;
    long id2 = ((doc_freq *)elemAddr2)->docid;
    if (id1 < id2) {
        return -1;
    } else if (id1 > id2) {
        return 1;
    } else { // id1 == id2
        return 0;
    }
}

/**
 * word_index is a index of a single word.
 * ---------------------------------------
 * A typical word_index looks like:
 *      [apple]: [doc1, 5], [doc3, 10], [doc5, 7]
 *      |-----|  |------------------------------|
 *       word     freqs
 */
typedef struct {
    char *word;
    hashset *freqs; // hashset<doc_freq>
} word_index;

static const size_t kWordIndexHashSetBuckets = 64;
static void new_wordindex(word_index *wi, const char *word) {
    hashset h;
    HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
    wi->freqs = &h;
    size_t wordlen = strlen(word);
    wi->word = (char *)malloc(wordlen + 1); // +1 for null-termination
    strcpy(wi->word, word);
    (wi->word)[wordlen] = '\0';
}

/**
 * Mainly used to build a word_index.
 */
void add_docfreq(word_index *wi, const long docid, const int frequency) {
    doc_freq df;
    new_docfreq(&df, docid, frequency);
    HashSetEnter(wi->freqs, &df);
}





/** ==================================== UNIT-TEST ======================================= */

int main(void) {
    /* apple:   [1611742826915764000, 5][1611742826915538000, 10] */
    word_index *apple = (word_index *)malloc(sizeof(word_index));
    new_wordindex(apple, "apple");
    add_docfreq(apple, 1611742826915764000L, 5);
    add_docfreq(apple, 1611742826915538000L, 10);
}

It gave me a segmentation fault :它给了我一个segmentation fault ：

[1]    84309 segmentation fault  testindexer

lldb find the problem occured when hashset try to callback the given pointer of function hashfn . lldb 发现当hashset尝试回调 function hashfn的给定指针时发生的问题。 I don't quite understand what is EXC_BAD_ACCESS (code=EXC_I386_GPFLT) here.我不太明白这里的EXC_BAD_ACCESS (code=EXC_I386_GPFLT)是什么。 I have done several unit test on hashset before, the HashSetEnter() function worked well with hashfn .我之前对 hashset 进行了几次单元测试， HashSetEnter() function 与hashfn配合得很好。 Another unit test was conducted on hash_docfreq() function, it can also calculate correctly the hash number.又对hash_docfreq() function 进行了单元测试，也能正确计算出 hash 数。 I'm a little bit confused.我有点困惑。 Anyone can help?任何人都可以帮忙吗？ Thanks!谢谢！

Process 89962 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
    frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
   127  }
   128
   129  void HashSetEnter(hashset *h, const void *elemAddr) {
-> 130      int hash = h->hashfn(elemAddr, h->bucketNum);
   131      vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
   132      // search in the hash set first
   133      int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
Target 0: (testnothing) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
  * frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
    frame #1: 0x0000000100003c37 testnothing`add_docfreq(wi=0x0000000100306060, docid=1611742826915764000, frequency=5) at test_nothing.c:222:2
    frame #2: 0x0000000100003cae testnothing`main at test_nothing.c:235:2
    frame #3: 0x00007fff70df0cc9 libdyld.dylib`start + 1
(lldb)

Answer 1

Running under gdb , after the fault, doing a tb command to get a stack traceback, we see:在gdb下运行，故障后，执行tb命令获取堆栈回溯，我们看到：

#0  0x00000005004016e6 in ?? ()
#1  0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
    elemAddr=0x7fffffffdc40) at orig.c:150
#2  0x0000000000401834 in add_docfreq (wi=0x405260, docid=1611742826915764000,
    frequency=5) at orig.c:266
#3  0x0000000000401879 in main () at orig.c:278
(gdb) frame 1
#1  0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
    elemAddr=0x7fffffffdc40) at orig.c:150
150     int hash = h->hashfn(elemAddr, h->bucketNum);

You are segfaulting in HashSetEnter , at the line:您在HashSetEnter中出现段错误，位于以下行：

int hash = h->hashfn(elemAddr, h->bucketNum);

This is because h is not valid at this point.这是因为此时h无效。

Examinining the source, the place that sets the value that is ultimately invalid, it is set in new_wordindex .检查源，设置最终无效的值的地方，它设置在new_wordindex中。

In new_wordindex , you are saving [and returning] the address of h .在new_wordindex中，您正在保存 [并返回] h的地址。

h is a function scoped variable here, so it is no longer valid after the function returns. h在这里是一个function作用域变量，所以在function返回后不再有效。

You have to use malloc for this.为此，您必须使用malloc 。 And, later, you need to be able to free this pointer during cleanup.而且，稍后，您需要能够在清理期间free此指针。

Here's the refactored code for the incorrect function.这是不正确的 function 的重构代码。

Note that to show old/original code vs. new/corrected code, I'm using preprocessor conditionals:请注意，为了显示旧/原始代码与新/更正代码，我使用了预处理器条件：

#if 0
// old/original code
// NOTE: this is _not_ compiled in
#else
// new/corrected code
// NOTE: this _is_ compiled in
#endif

The code under #if 0 can be elided/removed, leaving just the #else code. #if 0下的代码可以省略/删除，只留下#else代码。

static void
new_wordindex(word_index * wi, const char *word)
{
// NOTE/BUG: h is function scoped -- this can _not_ be saved and returned
// because it ceases to be valid when we return
#if 0
    hashset h;
    HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
    wi->freqs = &h;
#else
    hashset *h = malloc(sizeof(*h));
    HashSetNew(h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
    wi->freqs = h;
#endif

    size_t wordlen = strlen(word);

    wi->word = (char *) malloc(wordlen + 1);    // +1 for null-termination
    strcpy(wi->word, word);
    (wi->word)[wordlen] = '\0';
}

为什么当回调指针指向 function 时出现 EXC_BAD_ACCESS (code=EXC_I386_GPFLT)？

问题描述

1 个解决方案

解决方案1
2 已采纳 2021-02-12 02:13:02

为什么当回调指针指向 function 时出现 EXC_BAD_ACCESS (code=EXC_I386_GPFLT)？

问题描述

1 个解决方案

解决方案1 2 已采纳 2021-02-12 02:13:02

解决方案1
2 已采纳 2021-02-12 02:13:02