简体   繁体   English

使用ctypes时出现分段错误

[英]segmentation fault when using ctypes

I was keeping getting the segmentation fault when call c++ from python using ctypes. 使用ctypes从python调用c ++时,我一直遇到分段错误。 i have attached the gdb to the c++ code and make sure that the C++ code works well. 我已将gdb附加到c ++代码,并确保C ++代码能正常工作。 The segmentation fault was raised after the c++ code has correct return value and before returning to the python code. 在c ++代码具有正确的返回值之后以及返回python代码之前,引发了分段错误。 i checked everything else i guess might be a mistake, especially the in/out parameters, but still couldn't get any clue of where is wrong. 我检查了所有其他东西,我猜可能是错误的,尤其是in / out参数,但仍然无法获得错误之处的任何线索。 Any help would be appreciated. 任何帮助,将不胜感激。 Here is the c++ code: 这是c ++代码:

string_utils.cpp string_utils.cpp

#include <iostream>
#include <algorithm>
#include <tuple>
#include <vector>
#include <string>
#include <exception>
#include <unistd.h>
#include <stdbool.h>

using namespace std;

class SimpleTrieNode {
public:
    char key = '\0';
    bool end = false;
    unsigned int num_children = 0;
    SimpleTrieNode* p_next_sibling = nullptr;
    SimpleTrieNode* p_first_child = nullptr;

public:
    SimpleTrieNode* add_child(char key) {
        if (p_first_child == nullptr) {
            p_first_child = new SimpleTrieNode();
            p_first_child->key = key;
            num_children++;
            return p_first_child;
        }
        SimpleTrieNode *p = p_first_child, *p_prev;
        while (p != nullptr) {
            if (p->key == key) {
                return p;
            }
            p_prev = p;
            p = p->p_next_sibling;
        }
        p = new SimpleTrieNode();
        p_prev->p_next_sibling = p;
        p->key = key;
        num_children++;
        return p;
    }

    SimpleTrieNode* get_child(char key) {
        SimpleTrieNode *p = p_first_child;
        while (p != nullptr) {
            if (p->key == key) {
                return p;
            }
            p = p->p_next_sibling;
        }
        return nullptr;
    }

    ~SimpleTrieNode() {
        SimpleTrieNode *p = p_first_child, *p_next = nullptr;
        while (p != nullptr) {
            p_next = p->p_next_sibling;
            delete p;
            p = p_next;
        }
    }
};

class SimpleTrie {
public:
    SimpleTrieNode root;
    unsigned int max_len = 0;

public:
    SimpleTrieNode* add(char* d) {
        string data(d);
        if (max_len < data.length()) {
            max_len = data.length();
        }
        SimpleTrieNode* p = &root;
        for (char c : data) {
            p = p->add_child(c);
        }
        p->end = true;
        return p;
    }

    bool exist(string q) {
        SimpleTrieNode* p = &root;
        for (char c : q) {
            if (p != nullptr) {
                p = p->get_child(c);
            } else {
                return false;
            }
        }
        return true;

    }
};

unsigned int convert_2d_index_to_1d(unsigned int i, unsigned int j,
        unsigned int x, unsigned int y) {
    return i * x + j;
}

_Bool compute_edit_distance_by_diagonal(SimpleTrieNode const * const p_node,
        string q, unsigned int len_x, unsigned int len_y, unsigned int i,
        unsigned int j, unsigned int threshold, unsigned int matrix[],
        char matched_seq[]) {
    unsigned int k = j;
    unsigned int min_row_value = threshold + 1;
    matched_seq[i - 1] = p_node->key;
    matched_seq[i]='\0';
    while ((j - i <= threshold || j < i) && j <= q.length()) {
        unsigned int min_dist = min(
                min(
                        matrix[convert_2d_index_to_1d(i - 1, j, len_x + 1,
                                len_y + 1)] + 1,
                        matrix[convert_2d_index_to_1d(i, j - 1, len_x + 1,
                                len_y + 1)] + 1),
                p_node->key == q[j - 1] ?
                        matrix[convert_2d_index_to_1d(i - 1, j - 1, len_x + 1,
                                len_y + 1)] :
                        matrix[convert_2d_index_to_1d(i - 1, j - 1, len_x + 1,
                                len_y + 1)] + 1);
        min_row_value = min(min_dist, min_row_value);
        matrix[convert_2d_index_to_1d(i, j, len_x + 1, len_y + 1)] = min_dist;
        if (p_node->end && q.length() == j && min_dist <= threshold) {
            return true;
        }
        j++;
    }

    if (min_row_value > threshold || i >= threshold + len_y) {
        return false;
    }
    SimpleTrieNode *child = p_node->p_first_child;
    k = i < threshold + 1 ? k : k + 1;
    while (child != nullptr) {
        _Bool matched = compute_edit_distance_by_diagonal(child, q, len_x, len_y,
                i + 1, k, threshold, matrix, matched_seq);
        if (matched) {
            return matched;
        }
        child = child->p_next_sibling;
    }
    return false;
}

_Bool compute_edit_distance_by_diagonal(SimpleTrieNode const * const p_node,
        string q, unsigned int len_x, unsigned int len_y, unsigned int i,
        unsigned int j, unsigned int threshold, char matched_seq[]) {
    unsigned int size = (len_x + 1) * (len_y + 1);
    unsigned int matrix[size];
    fill_n(matrix, size, threshold+1);
    for (unsigned int ii = 0; ii <= threshold && ii < len_x + 1; ii++) {
        matrix[convert_2d_index_to_1d(ii, 0, len_x + 1, len_y + 1)] = ii;
    }
    for (unsigned int jj = 0; jj <= threshold && jj < len_y + 1; jj++) {
        matrix[convert_2d_index_to_1d(0, jj, len_x + 1, len_y + 1)] = jj;
    }
    return compute_edit_distance_by_diagonal(p_node, q, len_x, len_y, i, j,
            threshold, matrix, matched_seq);
}

_Bool approximate_string_match(const SimpleTrie* p_trie, char* q,
        unsigned int threshold, char matched_seq[]) {
    string query(q);
    unsigned int qlen = query.length();

    SimpleTrieNode* child = p_trie->root.p_first_child;
    while (child != nullptr) {
        _Bool m = compute_edit_distance_by_diagonal(child, query,
                p_trie->max_len, qlen, 1, 1, threshold, matched_seq);
        if (m) {
            return m;
        }
        child = child->p_next_sibling;
    }
    return false;
}

extern "C" {
    SimpleTrie* SimpleTrie_Initialization() {
        SimpleTrie *p = new SimpleTrie();
        return p;
    }
    void SimpleTrie_Destruction(SimpleTrie *p) {
        delete p;
    }
    void SimpleTrie_Add(SimpleTrie *p,char *q) {
        p->add(q);
    }
    _Bool SimpleTrie_Approximate_string_match(SimpleTrie* p_trie, char *q,
            unsigned int threshold, char matched_seq[]) {
        return approximate_string_match(p_trie, q, threshold, matched_seq);
    }
}

Here is the makefile: 这是makefile:

all:string_utils.so
string_utils.so: string_utils.cpp
    g++ -c -fPIC string_utils.cpp -o string_utils.o
    g++ -shared -Wall -o string_utils.1.0.so string_utils.o
    rm string_utils.o

Here is the python code: 这是python代码:

string_utils.py string_utils.py

from ctypes import *
import os
import codecs


lib_path = os.path.join(
    os.path.dirname(os.path.realpath(__file__)),
    'string_utils.1.0.so')
lib = cdll.LoadLibrary(lib_path)
lib.SimpleTrie_Initialization.argtypes = []
lib.SimpleTrie_Initialization.restype = c_void_p
lib.SimpleTrie_Destruction.argtypes = [c_void_p]
lib.SimpleTrie_Destruction.restype = None
lib.SimpleTrie_Add.argtypes = [c_void_p, c_char_p]
lib.SimpleTrie_Add.restype = None
lib.SimpleTrie_Approximate_string_match.argtypes = [
    c_void_p,
    c_char_p,
    c_uint,
    c_char_p]
lib.SimpleTrie_Approximate_string_match.restype = c_bool


class SimpleTrie(object):
    initialized = False

    def __init__(self):
        self.obj = lib.SimpleTrie_Initialization()

    def __del__(self):
        lib.SimpleTrie_Destruction(self.obj)

    def add(self, q):
        lib.SimpleTrie_Add(self.obj, q.encode(encoding='utf-8'))

    def approximate_string_match(self, q, threshold):
        bs = q.encode(encoding='utf-8')
        matched_seq=create_string_buffer(len(bs)+threshold+1)
        m = lib.SimpleTrie_Approximate_string_match(self.obj, bs,
                                                    threshold, matched_seq)
        return m, matched_seq.value.decode('utf-8')


if __name__ == '__main__':
    ptrie = SimpleTrie()
    lines = ['abderqwerwqerqweefg',
             'dfaewqrwqerwqerqwerqwerdfa',
             'afdfertewtertetrewqrwqrffg',
             'fgfdhgadsfsadfsadfadsffdhdf',
             'fgfdhgadsfsadjhgfdfadsffdhdf',
             'antihsadsfasdfaddafsadfsadsfasdaive',
             'dsgffdshgdsgffdadsfsadfsadfsfdasdfasdfasdfasdfsg'
             ]
    for line in lines:
        ptrie.add(line)
    x ,y = ptrie.approximate_string_match(u"antihsadsfasdfadsfasdaive", 6)
    print(x)

Thanks in advance! 提前致谢!

The problem does not relate to ctypes. 该问题与ctypes无关。 There is a mess in cpp. cpp中一团糟。 This 这个

unsigned int matrix[size];

cause stackoverflow… welcome. 引起stackoverflow ...欢迎。 You are trying to put a large portion of data to stack. 您正在尝试将大部分数据放入堆栈。 You should use std::vector<unsigned> , std::unique_ptr<unsigned[]> or other heap or static storages. 您应该使用std::vector<unsigned>std::unique_ptr<unsigned[]>或其他堆或静态存储。 Btw, it's not portable to use non-compile-time constants for array size. 顺便说一句,使用非编译时常量作为数组大小不是可移植的。

Then you correct the memory allocation, it will fail somewhere further in recursion, because you pass strings by copying. 然后,您更正了内存分配,由于您通过复制传递了字符串,因此它将在递归的其他地方失败。 You, probably, should use const string& q instead string q . 您可能应该使用const string& q而不是string q

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM