简体   繁体   English

多线程分段错误

[英]segmentation fault with multithreading

I am trying to calculate prime numbers with striking out multiples of numbers.我正在尝试通过剔除数字的倍数来计算素数。 Doing this enough and you are leftover with primes only.这样做足够了,你只剩下素数了。
example: https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes .例如: https : //en.wikipedia.org/wiki/Sieve_of_Eratosthenes

The problem is that this only works for primes till 10.000.问题是这仅适用于 10.000 之前的素数。 When trying to calculate more will result in a segmentation fault(core dumped).尝试计算更多时将导致分段错误(核心转储)。 There is however some trade off between the amount of threads and the number of primes that you try to calculate.然而,在线程数量和您尝试计算的素数数量之间存在一些权衡。 eg: NROF_SIEVE = 4000 and NROF_THREADS = 10. sometimes work.例如:NROF_SIEVE = 4000 和 NROF_THREADS = 10。有时会起作用。 But:NROF_SIEVE = 4000 and NROF_THREADS = 20. Does not work and results in segmentation error.但是:NROF_SIEVE = 4000 和 NROF_THREADS = 20。不起作用并导致分段错误。

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>     // for usleep()
#include <time.h>       // for time()
#include <pthread.h>
#include <errno.h>

#include "prime.h"

typedef unsigned long long  MY_TYPE;
static pthread_mutex_t      mutex          = PTHREAD_MUTEX_INITIALIZER;

// create a bitmask where bit at position n is set
#define BITMASK(n)          (((MY_TYPE) 1) << (n))

// check if bit n in v is set
#define BIT_IS_SET(v,n)     (((v) & BITMASK(n)) == BITMASK(n))

// set bit n in v
#define BIT_SET(v,n)        ((v) =  (v) |  BITMASK(n))

// clear bit n in v
#define BIT_CLEAR(v,n)      ((v) =  (v) & ~BITMASK(n))

static void rsleep (int t);
static void * my_thread (void * arg);
static unsigned long long get_next_parameter (unsigned long long current);

int main (void)
{
    // TODO: start threads generate all primes between 2 and NROF_SIEVE and output the results
    // (see thread_malloc_free_test() and thread_mutex_test() how to use threads and mutexes,
    //  see bit_test() how to manipulate bits in a large integer)

    unsigned long long i = 0;
    for (i = 0; i <= NROF_SIEVE/64; i++) {
        buffer[i] = ~0;
    }
    BIT_CLEAR(buffer[0], 0);
    BIT_CLEAR(buffer[0], 1);

    unsigned long long *       parameter;
    pthread_t   thread_id[NROF_THREADS];
    unsigned long long current = 2;

    // start the first NROF_THREADS
    for (i = 0; i < NROF_THREADS; i++) {
        parameter = malloc(sizeof(MY_TYPE));
        * parameter = current;
        pthread_create (&thread_id[i], NULL, my_thread, parameter);
        current++;
        rsleep(5);
    }

    /*
    while (1) {
        if (current >= NROF_SIEVE) {
            //exit the loop since we are done
            break;
        } else if (BIT_IS_SET(buffer[current/64], current % 64)){
            // eliminate these multiples
            * parameter = current;
            // start the thread
            pthread_create (&thread_id, NULL, my_thread, parameter);
            rsleep (3);
            // wait for the thread, and we are interested in the return value
            pthread_join (thread_id, NULL);
            current++;
        } else {
            // not yet a one found so increase
            current++;
        }
    }
    */ 

    unsigned long long o = 0;
    while (1) {
        pthread_join (thread_id[o], NULL);
        pthread_mutex_lock (&mutex);
        parameter = malloc(sizeof(MY_TYPE));
        * parameter = get_next_parameter (current);
        if (* parameter == 0) { //no more elements to process
            break;
        }
        current++;
        pthread_create (&thread_id[o], NULL, my_thread, parameter);
        pthread_mutex_unlock (&mutex);
        rsleep(5);
        o = (o + 1) % NROF_THREADS;
    }
    pthread_mutex_unlock (&mutex);

    // join with the last threads
    for (i = 0; i < NROF_THREADS; i++) {
        pthread_join (thread_id[i], NULL);
    }

    // print the prime numbers
    for (i = 2; i <= NROF_SIEVE; i++) {
        if (BIT_IS_SET(buffer[i/64], i % 64)) {
            fprintf(stdout, "%lld\n", i);
        }
    }
    return (0);
}

/*
* Thread method to strike out all the multiples of the arg
* parameter. Put the results in the buffer[]. Has to be with a `mutex protecting in from other threads enter this critical section`
*/
static void * my_thread (void * arg) {
    unsigned long long * argi;
    argi = (unsigned long long *) arg;   // cast it to a int pointer
    unsigned long long local_current = * argi;
    unsigned long long helper = local_current;
    free(arg);
    while (local_current <= NROF_SIEVE) {
        local_current = local_current + helper;
        pthread_mutex_lock (&mutex);
        BIT_CLEAR(buffer[local_current/64], local_current % 64);
        pthread_mutex_unlock (&mutex);
    }
    return NULL;
}

unsigned long long get_next_parameter (unsigned long long current) {
    while (current <= NROF_SIEVE) {
        if (BIT_IS_SET(buffer[current/64], current % 64)) {
            return current;
        } else {
            current++;
        }
    }
    return 0;
}

/*
 * rsleep(int t)
 *
 * The calling thread will be suspended for a random amount of time between 0 and t microseconds
 * At the first call, the random generator is seeded with the current time
 */
static void rsleep (int t)
{
    static bool first_call = true;

    if (first_call == true)
    {
        srandom (time (NULL) % getpid());
        first_call = false;
    }
    usleep (random () % t);
}

The header file:头文件:

/**
 * NROF_SIEVE: size of the sieve
 * (value must be between 1 and 15485864)
 */
#define NROF_SIEVE         40

/**
 * NROF_THREADS: number of threads that will run in parallel
 */
#define NROF_THREADS        50

/**
 * buffer[]: datastructure of the sieve; each number is represented by one bit
 */
static unsigned long long   buffer [(NROF_SIEVE/64) + 1];

Segmentation faults are often caused by invalid memory accesses.分段错误通常是由无效的内存访问引起的。 Recompile your program with and/ or on a sufficiently modern version of Clang or GCC.在足够现代的 Clang 或 GCC 版本上使用和/或编译您的程序。

It aborts pretty much here with a meaningful backtrace:它在这里几乎中止并带有一个有意义的回溯:

$ gcc -Wall -g prime.c -fsanitize={undefined,address} -pthread
$ ./a.out
prime.c:121:9: runtime error: index 1 out of bounds for type 'long long unsigned int [1]'
prime.c:121:9: runtime error: index 1 out of bounds for type 'long long unsigned int [1]'
prime.c:121:9: runtime error: load of address 0x0000006043e8 with insufficient space for an object of type 'long long unsigned int'
0x0000006043e8: note: pointer points here
 20 8a a2 ec  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00
              ^ 
=================================================================
==3351==ERROR: AddressSanitizer: global-buffer-overflow on address 0x0000006043e8 at pc 0x0000004018cb bp 0x7fc153ae0e30 sp 0x7fc153ae0e20
READ of size 8 at 0x0000006043e8 thread T31
    #0 0x4018ca in my_thread /tmp/so/prime.c:121
    #1 0x7fc1660064a3 in start_thread (/usr/lib/libpthread.so.0+0x74a3)
    #2 0x7fc165d4413c in clone (/usr/lib/libc.so.6+0xe913c)

0x0000006043e8 is located 0 bytes to the right of global variable 'buffer' defined in 'prime.h:15:29' (0x6043e0) of size 8
0x0000006043e8 is located 56 bytes to the left of global variable 'mutex' defined in 'prime.c:12:29' (0x604420) of size 40
SUMMARY: AddressSanitizer: global-buffer-overflow /tmp/so/prime.c:121 my_thread

Line 121 contains buffer[local_current/64] :第 121 行包含buffer[local_current/64]

static void * my_thread (void * arg) {
    unsigned long long * argi;
    argi = (unsigned long long *) arg;   // cast it to a int pointer
    unsigned long long local_current = * argi;
    unsigned long long helper = local_current;
    free(arg);
    while (local_current <= NROF_SIEVE) {
        local_current = local_current + helper;
        pthread_mutex_lock (&mutex);
        BIT_CLEAR(buffer[local_current/64], local_current % 64);
        pthread_mutex_unlock (&mutex);
    }
    return NULL;
}

This seems to suggest that your local_current becomes larger than NROF_SIEVE which is indeed the case if helper > 0 .这似乎表明您的local_current变得大于NROF_SIEVE如果helper > 0确实是这种情况。 Something is fishy...有什么鱼腥味...

With a debugger ( gdb ), you can set (conditional) breakpoints, inspect variables and more.使用调试器 ( gdb ),您可以设置(条件)断点、检查变量等。 An example:一个例子:

$ gdb -q ./a.out 
Reading symbols from ./a.out...done.
(gdb) break prime.c:121 if local_current >= 64
Breakpoint 1 at 0x4017e6: file prime.c, line 121.
(gdb) run
Starting program: /tmp/so/a.out 
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib/libthread_db.so.1".
[Switching to Thread 0x7fffe21de700 (LWP 4118)]

Breakpoint 1, my_thread (arg=0x60200000ebd0) at prime.c:121
121             BIT_CLEAR(buffer[local_current/64], local_current % 64);
(gdb) print local_current
$1 = 70
(gdb) step
__ubsan::__ubsan_handle_out_of_bounds (Data=0x602ea0, Index=1) at /build/gcc-multilib/src/gcc-5.2.0/libsanitizer/ubsan/ubsan_handlers.cc:227
227     /build/gcc-multilib/src/gcc-5.2.0/libsanitizer/ubsan/ubsan_handlers.cc: No such file or directory.
(gdb) next
prime.c:121:9: runtime error: index 1 out of bounds for type 'long long unsigned int [1]'
228     in /build/gcc-multilib/src/gcc-5.2.0/libsanitizer/ubsan/ubsan_handlers.cc
(gdb) info threads 
  Id   Target Id         Frame 
  40   Thread 0x7fffdf9d9700 (LWP 4128) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  39   Thread 0x7fffe01da700 (LWP 4127) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  38   Thread 0x7fffe09db700 (LWP 4125) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  37   Thread 0x7fffe11dc700 (LWP 4124) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  36   Thread 0x7fffe19dd700 (LWP 4123) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
* 35   Thread 0x7fffe21de700 (LWP 4118) "a.out" __ubsan::__ubsan_handle_out_of_bounds (Data=0x602ea0, Index=1)
    at /build/gcc-multilib/src/gcc-5.2.0/libsanitizer/ubsan/ubsan_handlers.cc:227
  34   Thread 0x7fffe29df700 (LWP 4117) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  33   Thread 0x7fffe31e0700 (LWP 4116) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  32   Thread 0x7fffe39e1700 (LWP 4115) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  9    Thread 0x7fffef1f8700 (LWP 4092) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  7    Thread 0x7ffff01fa700 (LWP 4089) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  6    Thread 0x7ffff09fb700 (LWP 4088) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  4    Thread 0x7ffff19fd700 (LWP 4086) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
  1    Thread 0x7ffff7fac7c0 (LWP 4080) "a.out" 0x00007ffff5f4dcfc in __lll_lock_wait () from /usr/lib/libpthread.so.0
(gdb) continue
Continuing.
prime.c:121:9: runtime error: index 1 out of bounds for type 'long long unsigned int [1]'
prime.c:121:9: runtime error: load of address 0x0000006043e8 with insufficient space for an object of type 'long long unsigned int'
0x0000006043e8: note: pointer points here
 20 8a a2 a8  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00
              ^ 
=================================================================
==4080==ERROR: AddressSanitizer: global-buffer-overflow on address 0x0000006043e8 at pc 0x0000004018cb bp 0x7fffe21dde30 sp 0x7fffe21dde20

Good luck with your homework!祝你的家庭作业好运!

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM