简体   繁体   中英

Cython No Performance Increase with prange/parallel

I'm using Cython version 0.27.3 to compile the following source for a simple primality testing module that contains both python and cython implementations of the same algorithm. When I set the threads parameter to different values, I see no performance increase, despite the GIL being released. Is there something that's preventing this from running in parallel?

The function in question is the cdef void _getprimes which accepts a memoryview slice as a parameter and should set all non-prime values to 0 in that slice.

primes.pyx

#cython: boundscheck=False, wraparound=False, nonecheck=False
cimport cython
from cpython cimport array
from cython.parallel cimport parallel, prange
from libc.math cimport sqrt, ceil
from libc.stdlib cimport malloc, free
from libc.stdio cimport printf
import math

# =====================
# Python implementation
# =====================

def pyisprime(n):
    """Python implementation"""
    if n < 2 or n & 1 == 0:
        if n == 2:
            return True
        return False
    for i in range(2, int(math.sqrt(n)) + 1):
        if n % i == 0:
            return False
    return True

def pygetprimes(nums):
    return [num for num in nums if pyisprime(num)]


# =====================
# Cython implementation
# =====================
cdef int _isprime(unsigned long long n) nogil:
    """Cython implementation of a simple primality check"""
    cdef unsigned long long upper 
    cdef unsigned long long i = 3
    cdef int prime = 1
    if n < 2 or n & 1 == 0:
        if n == 2:
            return 1
        return 0
    upper = <unsigned long long>ceil(sqrt(<double>n))
    while i <= upper:
        if n % i == 0:
            prime = 0
            break
        i += 1
    return prime

def isprime(unsigned long long n):
    """Wrapper for _isprime"""
    cdef int result
    with nogil:
        result = _isprime(n)
    return result

cdef void _getprimes(unsigned long long[:] nums, int threads) nogil:
    cdef unsigned long num
    cdef int i = 0
    with parallel(num_threads=threads):
        for i in prange(nums.shape[0], schedule="dynamic"):
            if _isprime(nums[i]) == 0:
                nums[i] = 0

def getprimes(nums, int threads = 1):
    """Wrapper for _getprimes"""
    cdef unsigned long long num
    cdef unsigned long long[:] primes = array.array("Q", nums)

    with nogil:
        _getprimes(primes, threads)

    return [num for num in primes if num != 0]

setup.py

#!/usr/bin/env python3
from distutils.core import setup
from Cython.Build import cythonize

setup(
    name="primes",
    ext_modules=cythonize('primes.pyx'),
)

test.py

#!/usr/bin/env python3
import functools
import random
import time
import primes

def timed(func):
    def wrapped(*args, **kwargs):
        start = time.time()
        val = func(*args, **kwargs)
        end = time.time()
        print(func.__name__, end - start)
        return val
    return functools.wraps(func)(wrapped)


def main():
    nums = [random.randint(0, 0xffffff) for _ in range(500000)]

    pyfoo = timed(primes.pygetprimes)
    cyfoo = timed(primes.getprimes)

    x = pyfoo(nums)
    y = cyfoo(nums, 1)
    z = cyfoo(nums, 4)
    assert x == y == z

if __name__ == "__main__":
    main()

When I run cyfoo , I expected that increasing the number of threads from 1 to 4 would show some type of speed increase, but this is not the case:

[aarcher@Arch]: ~/Programming/Cython/build/lib.linux-x86_64-3.6>$ ./test.py 
pygetprimes 5.11554741859436
getprimes 1.1129701137542725
getprimes 1.1306445598602295

It seems you need to enable compiler flags for OpenMP for the parallel statements to actually do anything.

See cython docs here http://cython.readthedocs.io/en/latest/src/userguide/parallelism.html#compiling

# setup.py
# ... omitted ...

ext_modules = [
    Extension(
        "hello",
        ["hello.pyx"],
        extra_compile_args=['-fopenmp'],
        extra_link_args=['-fopenmp'],
    )
]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM