[英]How to port a specific C module to Python 3?
有一个稀疏的 pip包,目前只能用Python2编译。
当我使用sudo pip install thinning
安装它然后尝试import thinning
,我收到一个错误:
ImportError: /usr/lib/python3.5/site-packages/thinning.cpython-35m-x86_64-linux-gnu.so: undefined symbol: Py_InitModule3
我假设这是因为Py_InitModule3
不再使用Py_InitModule3。 这是完整的c源文件:
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "Python.h"
#include "arrayobject.h"
#include <stdlib.h>
#include <assert.h>
#include <stdbool.h>
#include <limits.h>
static PyObject *guo_hall_thinning(PyObject *self, PyObject *args);
int _guo_hall_thinning(unsigned char* binary_image, int width, int height);
void initthinning(void);
/* ==== Set up the methods table ====================== */
static PyMethodDef thinningMethods[] = {
{"guo_hall_thinning",guo_hall_thinning, METH_VARARGS,
"Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall."
"Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format."
"\n\n"
"We assume that the dimensions of the image fit into an int on your platform. If your computer for some"
"reason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen."
"\n\n"
"interface:\n"
"\tguo_hall_thinning(segmented_image)"
"\tsegmented_image is a NumPy matrix,"
"\treturns the same NumPy matrix (thinned)"},
{NULL, NULL, 0, NULL} /* Sentinel - marks the end of this structure */
};
/* ==== Initialize the C_test functions ====================== */
void initthinning() {
PyObject* module = Py_InitModule3("thinning",thinningMethods, "Thinning of segmented images. See https://bitbucket.org/adrian_n/thinning.");
PyModule_AddStringConstant(module, "__author__", "Adrian Neumann <adrian_neumann@gmx.de>");
PyModule_AddStringConstant(module, "__version__", "1.2.3");
import_array(); // Must be present for NumPy. Called first after above line.
}
/* ==== Guo Hall Thinning =========
Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall.
Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format.
We assume that the dimensions of the image fit into an int on your platform. If your computer for some
reason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen.
interface: guo_hall_thinning(segmented_image)
segmented_image is a NumPy matrix,
returns the same NumPy matrix (thinned)
*/
static PyObject *guo_hall_thinning(PyObject *self, PyObject *args)
{
PyArrayObject *segmented_image;
/* Parse tuples separately since args will differ between C fcns */
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &segmented_image)) {
return NULL;
}
if (NULL == segmented_image) {
PyErr_SetString(PyExc_TypeError, "Parameter is not a valid image");
return NULL;
}
if (PyArray_TYPE(segmented_image) != NPY_UBYTE || !PyArray_CHKFLAGS(segmented_image, NPY_ARRAY_CARRAY)) {
PyErr_SetString(PyExc_TypeError, "Parameter is not a grayscale image");
return NULL;
}
npy_intp* shape = PyArray_DIMS(segmented_image);
int height = (int)shape[0];
int width = (int)shape[1];
unsigned char *in_data = PyArray_DATA(segmented_image);
if (height>=3 && width>=3) {
int ok = _guo_hall_thinning(in_data, width, height);
if (ok<0) {
return PyErr_NoMemory();
}
}
Py_INCREF(segmented_image);
return (PyObject*)segmented_image;
}
int nonzero_clever(const unsigned char* arr, unsigned int start, unsigned int len) {
/* find the first nonzero element from arr[start] to arr[start+len-1] (inclusive)
look at a long long at a time to be faster on 64 bit cpus */
const unsigned int step=sizeof(unsigned long long)/sizeof(unsigned char);
unsigned int i=start;
//unsigned types should throw exceptions on under/overflow...
while(len>step && i<len-step) {
if (*((unsigned long long*)(arr +i))==0) {
i+=step;
} else {
int j=0;
while(arr[i+j]==0) j++;
return i+j;
}
}
while(i<len) {
if (arr[i]!=0) { return i;}
i++;
}
return len;
}
int guo_hall_iteration(const unsigned char* binary_image, unsigned char* mask, const unsigned int width, const unsigned int height, const int iteration) {
/* one iteration of the algorithm by guo and hall. see their paper for an explanation.
We only consider nonzero elemets of the image. We never reinitialize the mask, once a pixel is
black, it will never become white again anyway. */
unsigned int changed = 0;
for (unsigned int j = 1; j < height-1; j++) {
const unsigned char* line = binary_image+j*width;
unsigned int start=0;
const int len = width-1;
while(start+1<len) {
start = nonzero_clever(line, start+1, len);
if (start==len) break;
const unsigned int i = start;
assert(line[i]!=0);
assert(binary_image[i + j*width]!=0);
const bool p2 = binary_image[i-1 + width*j];
const bool p6 = binary_image[i+1 + width*j];
const bool p9 = binary_image[i-1 + width*(j-1)];
const bool p8 = binary_image[i + width*(j-1)];
const bool p7 = binary_image[i+1 + width*(j-1)];
const bool p3 = binary_image[i-1 + width*(j+1)];
const bool p4 = binary_image[i + width*(j+1)];
const bool p5 = binary_image[i+1 + width*(j+1)];
const unsigned int C = ((!p2 && (p3 || p4)) +
(!p4 && (p5 || p6)) +
(!p6 && (p7 || p8)) +
(!p8 && (p9 || p2)));
// printf("%d %d %d %d %d %d %d %d\n",p2,p3,p4,p5,p6,p7,p8,p9);
if (C==1) {
const unsigned int N1 = (p9 || p2) + (p3 || p4) + (p5 || p6) + (p7 || p8);
const unsigned int N2 = (p2 || p3) + (p4 || p5) + (p6 || p7) + (p8 || p9);
const unsigned int N = N1 < N2 ? N1 : N2;
unsigned int m;
if (iteration == 0)
{m = (p8 && (p6 || p7 || !p9));}
else
{m = (p4 && (p2 || p3 || !p5));}
if (2 <= N && N <= 3 && m == 0) {
mask[i + width*j] = 0;
changed += 1;
}
}
}
}
return changed;
}
void andImage(unsigned char* image, const unsigned char* mask, const int size) {
/* calculate image &=mask.
to be faster on 64 bit cpus, we do this one long long at a time */
const int step = sizeof(unsigned long long)/sizeof(unsigned char);
unsigned long long* image_l = (unsigned long long*)image;
const unsigned long long* mask_l = (unsigned long long*) mask;
unsigned int i=0;
for(; size/step>2 && i<size/step-2; i+=2) {
image_l[i] = image_l[i] & mask_l[i];
image_l[i+1] = image_l[i+1] & mask_l[i+1];
}
for(i=i*step; i<size; ++i) {
image[i] = image[i] & mask[i];
}
}
int _guo_hall_thinning(unsigned char* binary_image, int width, int height) {
/* return -1 if we can't allocate the memory for the mask, else 0 */
int changed;
unsigned char* mask = (unsigned char*) malloc(width*height*sizeof(unsigned char));
if (mask==NULL) {
return -1;
}
memset(mask, UCHAR_MAX, width*height);
do {
changed = guo_hall_iteration(binary_image, mask, width, height, 0);
andImage(binary_image, mask, width*height);
changed += guo_hall_iteration(binary_image, mask, width, height, 1);
andImage(binary_image, mask, width*height);
} while (changed != 0);
free(mask);
return 0;
}
我已经开始阅读移植扩展模块到Python 3,但我必须承认我几乎无法理解。
我尝试将Py_InitModule
更改为Python 3模拟PyModule_Create
以及其他一些代码调整,但它不起作用。 不幸的是,这个细化模块是我们应用程序的硬依赖。 所以,我现在非常困难,没有时间和知识如何将此模块移植到Python3。
注意:我无法深入了解guo_hall_thinning
本身的功能guo_hall_thinning
。 我所知道的是它使用numpy C-API
一小部分来获取和返回数据作为ndarray
; 我找不到任何关于它们的文件被改变,所以它应该是好的去。
现在, 明确改变的是模块初始化的方式 ; 有了这个,我可以帮助你,并在Python 3发行版中导入它。 我也使用3.5
,尽管如此,我认为旧版本的3.x
系列之间的差异不应该存在或向后兼容 。
如您所述,在Porting to Python 3文档中提供了一般信息,其中包含有关模块初始化和状态中的初始化阶段的详细信息。 PEP 3121中描述了新的变化,它本身就是一个不错但具有挑战性的读物。
现在, 它的要点可以列在两点 :
A)模块现在在专用的PyModuleDef
结构中定义:
struct PyModuleDef{
PyModuleDef_Base m_base; /* To be filled out by the interpreter */
Py_ssize_t m_size; /* Size of per-module data */
PyMethodDef *m_methods;
inquiry m_reload;
traverseproc m_traverse;
inquiry m_clear;
freefunc m_free;
};
这个新结构包含一些附加成员,其中包含模块的名称和文档。 成员m_reload
, m_traverse
, m_clear
和m_free
在初始化/完成期间提供额外的控制,但是,我们可以选择将它们保留为NULL
。 这些以及模块m_size
设置为-1
是为了简单起见,设置这些值通常是为了支持多个解释器/多个初始化并且应该更加棘手。
因此,简而言之, thinning
模块的奇特新模块结构可能如下所示:
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"thinning",
"Thinning of segmented images. See https://bitbucket.org/adrian_n/thinning",
-1,
thinningMethods,
NULL,
NULL,
NULL,
NULL
};
这是第一期的问题!
B)新的初始化功能,即你需要给initthinning
一个重要的瘦脸。
新模块初始化函数返回PyObject *
,现在命名为PyInit_<module_name>
。 在其中(嘿,得到它?) 使用PyModule_Create(&moduledef)
创建新模块,它接受我们定义的结构并返回初始化模块。 它现在更漂亮,看起来像这样:
/* ==== Initialize the C_test functions ====================== */
PyObject *
PyInit_thinning(void){
// create module
PyObject *module = PyModule_Create(&moduledef);
// handle probable error
if (module == NULL)
return NULL;
PyModule_AddStringConstant(module, "__author__", "Adrian Neumann <adrian_neumann@gmx.de>");
PyModule_AddStringConstant(module, "__version__", "1.2.3");
import_array(); // Must be present for NumPy. Called first after above line.
// return newly created module
return module;
}
这一切都是为了初始化模块。 您可以下载该模块(我相信您已经完成)找到thinning_folder/src/c_thinning.c
文件并替换之前的 所有内容 :
/* ==== Guo Hall Thinning =========
以下内容:
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "Python.h"
#include "arrayobject.h"
#include <stdlib.h>
#include <assert.h>
#include <stdbool.h>
#include <limits.h>
static PyObject *guo_hall_thinning(PyObject *self, PyObject *args);
int _guo_hall_thinning(unsigned char* binary_image, int width, int height);
/* ==== Set up the methods table ====================== */
static PyMethodDef thinningMethods[] = {
{"guo_hall_thinning",guo_hall_thinning, METH_VARARGS,
"Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall."
"Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format."
"\n\n"
"We assume that the dimensions of the image fit into an int on your platform. If your computer for some"
"reason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen."
"\n\n"
"interface:\n"
"\tguo_hall_thinning(segmented_image)"
"\tsegmented_image is a NumPy matrix,"
"\treturns the same NumPy matrix (thinned)"},
{NULL, NULL, 0, NULL} /* Sentinel - marks the end of this structure */
};
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"thinning",
"Thinning of segmented images. See https://bitbucket.org/adrian_n/thinning.",
-1,
thinningMethods,
NULL,
NULL,
NULL,
NULL
};
/* ==== Initialize the C_test functions ====================== */
PyObject *
PyInit_thinning(void){
PyObject *module = PyModule_Create(&moduledef);
if (module == NULL)
return NULL;
PyModule_AddStringConstant(module, "__author__", "Adrian Neumann <adrian_neumann@gmx.de>");
PyModule_AddStringConstant(module, "__version__", "1.2.3");
import_array(); // Must be present for NumPy. Called first after above line.
return module;
}
/* ==== Guo Hall Thinning =========
// Leave the rest as it was
之后,导航到包含setup.py
的顶级目录并运行:
python setup.py install
照常。 可能会弹出一些编译警告,但可以安全地忽略这些警告。 如果一切顺利,您将获得成功安装,以下不会导致令人讨厌的seg-fault :
>>> from thinning import guo_hall_thinning
>>> print(guo_hall_thinning.__doc__)
Takes a 2D numpy UBYTE array in C-order and thins it in place using the algorithm by Guo and Hall.Images that come out of cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) have the right format.
We assume that the dimensions of the image fit into an int on your platform. If your computer for somereason has a 2 byte int and lots of memory so that the image can become too large, bad things can happen.
interface:
guo_hall_thinning(segmented_image) segmented_image is a NumPy matrix, returns the same NumPy matrix (thinned)
我进一步编辑了c_thinning.c
的源代码,打印出每次迭代过程中更改的元素数量。 它似乎正在改变一些事情,但我不明白它使用什么基本标准,因为我没有阅读相应的论文。
总之, guo_hall_thinning(ndarr)
显然不到位 “变薄”。 这意味着在执行它之后,将改变作为参数提供的原始数组。 所以,检查一下表格:
gray_img == guo_hall_thinning(gray_img)
永远是True
( 提示:检查numpy数组与(arr1 == arr2).all()
) 之间的相等性 。
这是我进行的测试,您可以直观地看到发生的变化,我相信这个测试也可以在您的机器上重现:
# dtype = 'B' is UBYTE
>>> n = numpy.ndarray(shape=(100, 200), dtype='B')
>>> n
array([[ 40, 159, 95, ..., 114, 114, 97],
[121, 95, 108, ..., 114, 101, 32],
[ 48, 161, 90, ..., 127, 0, 0],
...,
[110, 32, 97, ..., 124, 1, 0],
[124, 5, 0, ..., 0, 0, 131],
[ 1, 0, 25, ..., 0, 125, 17]], dtype=uint8)
>>> thinning.guo_hall_thinning(n)
-- Array height 100 Array width: 200
Value of `changed` during 0 iteration is: 1695
Value of `changed` during 1 iteration is: 1216
Value of `changed` during 2 iteration is: 808
Value of `changed` during 3 iteration is: 493
Value of `changed` during 4 iteration is: 323
Value of `changed` during 5 iteration is: 229
Value of `changed` during 6 iteration is: 151
Value of `changed` during 7 iteration is: 90
Value of `changed` during 8 iteration is: 46
Value of `changed` during 9 iteration is: 27
Value of `changed` during 10 iteration is: 11
Value of `changed` during 11 iteration is: 8
Value of `changed` during 12 iteration is: 7
Value of `changed` during 13 iteration is: 4
Value of `changed` during 14 iteration is: 0
Value of `ok` is: 0
# array returned
array([[ 40, 159, 95, ..., 114, 114, 97],
[121, 0, 0, ..., 114, 0, 32],
[ 48, 0, 0, ..., 127, 0, 0],
...,
[110, 0, 97, ..., 124, 1, 0],
[124, 5, 0, ..., 0, 0, 131],
[ 1, 0, 25, ..., 0, 125, 17]], dtype=uint8)
所以我猜它确实有效:-)。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.