简体   繁体   中英

Passing string array from python to C and using in for loop (code included)

I am trying to pass in two string arrays from python into C, using them in a nested for loop, comparing them against a parameter, and if the two strings meet this parameter, I append them into a new string array in C. The function ends with me returning the new array of compared strings. This function is called in python using CDLL, and this new string array is used is my python script.

#In Python:

PyOne = ["Apple", "Orange", "Banana"]
PyTwo = ["Cucumber", "Mango", "Pineapple", "Apple"]

I have translated these for use in my C function as follows:

#In Python:

PyOne = ["Apple", "Orange", "Banana"]
PyOne_bytes = []
for i in range(len(PyOne)):
    PyOne_bytes.append(bytes(PyOne[i], 'utf-8'))
One_array = (ctypes.c_char_p * (len(PyOne_bytes)+1))()
One_array[:-1] = PyOne_bytes

PyTwo = ["Cucumber", "Mango", "Pineapple", "Apple"]
PyTwo_bytes = []
for i in range(len(PyTwo)):
    PyTwo_bytes(bytes(PyTwo[i], 'utf-8'))
Two_array = (ctypes.c_char_p * (len(PyTwo_bytes)+1))()
Two_array[:-1] = PyTwo_bytes

The above code translates the existing Python string array into one that is interpretable by C.

This is my C function:

// In C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SCALING_FACTOR 0.1
#include <ctype.h>

...

char ** mainForLoop(const char ** PyOne, const char ** PyTwo) {
    char ** matches = malloc(100 * sizeof(char*));
    size_t i = 0;
    size_t j = 0;
    for (i = 0; i < sizeof(PyOne)/sizeof(PyOne[0]); i++) {
        for (j = 0; j < sizeof(PyTwo)/sizeof(PyTwo[0]); j++) {
            double v = comparison(PyOne[i], PyTwo[i]);
            if (v > 4) {
                strcat(matches, (PyOne[i]));
                strcat(matches, (";"));
            }
        }
    }
    return matches;
}

In python, I then print the returned value from the function as follows:

c.mainForLoop.argtypes = [POINTER(c_char_p), POINTER(c_char_p)]
c.mainForLoop.restype = ctypes.c_char_p

print(c.mainForLoop(One_array, Two_array))

If, for example, comparison("Apple", "Cucumber") = 5 (ie > 4), comparison("Orange", "Mango") = 7 (ie > 4), and everything other comparison() < 4, then I would expect, due to the following...

// In C
double v = comparison(PyOne[i], PyTwo[i]);
                if (v > 4) {
                    strcat(matches, (PyOne[i]));
                    strcat(matches, (";"));
                }

for

#In Python
print(cDoc.mainForLoop(One_array, Two_array))

>>> b'Apple;Orange'

but currently, this prints:

>>> b'Apple;'

I don't know what I'm doing wrong in my code. I'm somewhat new to C, and I've tried everything I can think of, any help would be appreciated, an explanation would also be ace!

Thank you!

EDIT: Following on the from the answers below, this is my new code:

// In C
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>


void free_list(char** list, size_t size) {
    for(size_t i = 0; i < size; ++i) if (list[i]) free(list[i]);
    free(list);
}

char ** mainForLoop(const char ** PyOne, const char ** PyTwo, size_t sizeOne, size_t sizeTwo) {
    size_t i = 0;
    size_t j = 0;

    char ** matches = malloc(sizeOne * sizeof(char *));

    char temp[100] = {0};

    for (i = 0; i < sizeOne; i++) {
        // Cleared on each pass
        temp[0] = 0;
        for (j = 0; j < sizeTwo; j++) {
            double v = similarity(PyOne[i], PyTwo[j]);
            if (v > 4) {
                // Works with the temp buffer
                strcat(temp, (PyOne[i]));
                strcat(temp, (";"));
                int size = strlen(temp) + 1; //+1 for null termination

                char * str = malloc(size);
                memcpy(str, temp, size);
                str[size-1] = 0; //Null termination

                matches[i] = str;
            }
        }
    }
    return matches;
    free_list(matches, sizeOne);
}



#In Python
dll = CDLL("c.file")
dll.mainForLoop.argtypes = POINTER(c_char_p),POINTER(c_char_p),c_size_t,c_size_t
dll.mainForLoop.restype = POINTER(c_char_p)
dll.free_list.argtypes = POINTER(c_char_p),c_size_t
dll.free_list.restype = None

def make_clist(lst):
    return (c_char_p * len(lst))(*[x.encode() for x in lst])

def mainForLoop(list1,list2):
    size = c_size_t()
    result = dll.mainForLoop(make_clist(list1),make_clist(list2),len(list1),len(list2))
    data = [x.decode() for x in result[:size.value]]
    dll.free_list(result,size.value)
    return data

list1 = ["Apple", "Orange", "Banana"]
list2 = ["Apple", "Mango", "Pineapple", "Apple"]
print(mainForLoop(list1,list2))

However, this returns:

[]

Please note that the "comparison()" function in C is a string distance calculation that returns a double value by comparing two strings.

Any help would be hugely appreciated.

Here is a example closer than your configuration. Note that my main function is just there to run the example, in you case it would be useless.

The main fix is that I allocate memory for each new string what you didn't do (you allocated the memory for pointers but not for the strings themselves). To do that I first use a 100 characters temporary buffer on the stack (you can make it longer if needed) and, once the length of the string is known, I allocate it (note that the function you prepared for freeing these strings and pointers (free_list) is already correct) :

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

double comparison(const char * one, const char * two) {
    int try = rand() % 12;
    return try;
}

char ** mainForLoop(char ** PyOne, char ** PyTwo, int sizeOne, int sizeTwo) {
    size_t i = 0;
    size_t j = 0;
    
    // Allocate an array of N string pointers where N is the size of PyOne
    char ** matches = malloc(sizeOne * sizeof(char *));
    // The temporary buffer
    char temp[100] = {0};
    
    for (i = 0; i < sizeOne; i++) {
        // Cleared on each pass
        temp[0] = 0;
        for (j = 0; j < sizeTwo; j++) {
            double v = comparison(PyOne[i], PyTwo[j]);
            if (v > 4) {
                // Works with the temp buffer
                strcat(temp, (PyOne[i]));
                strcat(temp, (";"));
                int size = strlen(temp) + 1; //+1 for null termination

                // Then allocates a string of the right size
                char * str = malloc(size);
                memcpy(str, temp, size);
                str[size-1] = 0; //Null termination

                // And collects it
                matches[i] = str;
            }
        }
    }
    return matches;
}


void free_list(char** list, size_t size) {
    for(size_t i = 0; i < size; ++i) if (list[i]) free(list[i]);
    free(list);
}


int main() {
    // Initializes random
    srand(time(0));
    int N = 3;
    char * PyOne[3] = {"Apple", "Orange", "Banana"};
    char * PyTwo[4] = {"Cucumber", "Mango", "Pineapple", "Apple"};
    
    char ** matches = mainForLoop(PyOne, PyTwo, N, 4);

    // Prints the result which is possibly (depending on random) :
    // "Apple; Orange; Banana;Banana;"

    for (char i=0; i<N; i++) printf("%s ", matches[i]);
    printf("\n");

    // Frees
    free_list(matches, N);
    return 0; 
}

The C code provided is incorrect, so here is demo showing how to pass and return byte arrays. This example just appends the two lists together. It also handles freeing the memory so there is no memory leak from the memory allocations in C.

test.c:

#ifdef _WIN32
#   define API __declspec(dllexport)
#else
#   define API
#endif

#include <stdlib.h>
#include <string.h>

API char** append_lists(const char** list1, size_t size1, const char** list2, size_t size2, size_t* pSize) {
    char** total = malloc((size1 + size2) * sizeof(char*));
    for(size_t i = 0; i < size1; ++i)
        total[i] = _strdup(list1[i]);
    for(size_t i = 0; i < size2; ++i)
        total[size1 + i] = _strdup(list2[i]);
    *pSize = size1 + size2;
    return total;
}

API void free_list(char** list, size_t size) {
    for(size_t i = 0; i < size; ++i)
        free(list[i]);
    free(list);
}

test.py:

from ctypes import *

dll = CDLL('./test')
dll.append_lists.argtypes = POINTER(c_char_p),c_size_t,POINTER(c_char_p),c_size_t,POINTER(c_size_t)
dll.append_lists.restype = POINTER(c_char_p)
dll.free_list.argtypes = POINTER(c_char_p),c_size_t
dll.free_list.restype = None

# Helper function to turn Python list of Unicode strings
# into a ctypes array of byte strings.
def make_clist(lst):
    return (c_char_p * len(lst))(*[x.encode() for x in lst])

# Helper function to convert the lists, make the call correctly,
# convert the return result back into a Python list of Unicode strings,
# and free the C allocations.
def append_lists(list1,list2):
    size = c_size_t()
    result = dll.append_lists(make_clist(list1),len(list1),make_clist(list2),len(list2),byref(size))
    data = [x.decode() for x in result[:size.value]]
    dll.free_list(result,size.value)
    return data

list1 = ["Apple", "Orange", "Banana"]
list2 = ["Cucumber", "Mango", "Pineapple", "Apple"]
print(append_lists(list1,list2))

Output:

['Apple', 'Orange', 'Banana', 'Cucumber', 'Mango', 'Pineapple', 'Apple']

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM