简体   繁体   中英

Why does my program not wait when I call sem_wait?

Essentially, my program creates 3 threads. The "server" and 2 "workers." The workers are meant to sum the 3 digit positive integers in a 1000 line file (500 numbers per thread). After each worker has summed its part, the server prints each workers total. The only problem is my semaphores are not seeming to work.

Here is a version of my program:

// simple c program to simulate POSIX thread and semaphore
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <semaphore.h>
// define semaphores
sem_t s1;
FILE *file;
int sum1 = 0, sum2 = 0, num1 = 0, num2 = 0;
// file name
char fileName[10] = "data1.dat";
// server routine
void* server_routine()
{
    printf("Server sent signal to worker thread 1\n");
    printf("Server sent signal to worker thread 2\n");
    sem_wait(&s1);
    printf("Server recieved completion signal from worker thread 1\n");
    sem_wait(&s1);
    printf("Server recieved completion signal from worker thread 2\n\n");
    // print the final results
    printf("The sum of the first 500 numbers in the file is: %d\n", sum1);
    printf("The sum of the last 500 numbers in the file is: %d\n\n", sum2);
    pthread_exit(NULL);
}
// thread 1 reoutine
void* t1_routine()
{
    printf("Thread 1 recieved signal from server\n");
    file = fopen(fileName, "r");
    for(int i = 0; i < 500; i++)
    {
        fscanf(file, "%d", &num1);
        sum1 += num1;
    }
    printf("sum in thread 1: %d\n", sum1);
    printf("Thread 1 sends completion signal to server\n");
    sem_post(&s1);
    pthread_exit(NULL);
}
// thread 2 routine
void* t2_routine()
{
    printf("Thread 2 recieved signal from server\n");
    file = fopen(fileName, "r");
    fseek(file, 500 * 5, SEEK_SET);
    for(int i = 0; i < 500; i++)
    {
        fscanf(file, "%d", &num2);
        sum2 += num2;
    }
    printf("sum in thread 2: %d\n", sum2);
    printf("Thread 2 sends completion signal to server\n");
    sem_post(&s1);
    pthread_exit(NULL);
}
// main function
int main(int argc, char *argv[])
{
    // define threads
    pthread_t server, t1, t2;
    // initialize the semaphore
    sem_init(&s1, 0, 0);
    
    if(pthread_create(&server, NULL, &server_routine, NULL) != 0)
    {
        return 1;
    }

    if(pthread_create(&t1, NULL, &t1_routine, NULL) != 0)
    {
        return 2;
    }
    if(pthread_create(&t2, NULL, &t2_routine, NULL) != 0)
    {
        return 3;
    }

    if(pthread_join(server, NULL) != 0)
    {
        return 4;
    }

    if(pthread_join(t1, NULL) != 0)
    {
        return 5;
    }
    if(pthread_join(t2, NULL) != 0)
    {
        return 6;
    }
    // destroy semaphores
    sem_close(&s1);
    // exit thread
    pthread_exit(NULL);
    // end
    return 0;
}

I've tested with less threads more semaphores as well, with non luck. I've tried different initial semaphore values. The only time I can get the correct output is when I manually wait with sleep(5); but that defeats the purpose of this project.

A few issues...

  1. Each client thread does its own/private fopen but FILE *file; is global so they overwrite each others values.
  2. We need to make this variable function scoped so each thread has its own private pointer.
  3. There are no fclose calls.
  4. pthread_exit should not be done by the main thread. It is only for threads created with pthread_create .

Otherwise...

  1. Whichever thread does the fopen last will set the final value.
  2. So, there is a race condition and the effect is the same as if the main thread (prior to pthread_create calls) had done a single fopen , defeating the purpose of each thread doing its own fopen .
  3. Worse yet, a given thread may do the first fopen , then start with fscanf and have its file value changed when the second thread replaces the file value, so weird stuff happens to each thread because they are doing fseek/fscanf on the same FILE * instance.
  4. Having the aforementioned fclose calls would have made the issue more evident.

Here is the refactored code. It is annotated:

// simple c program to simulate POSIX thread and semaphore
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <semaphore.h>

// define semaphores
sem_t s1;
// NOTE/BUG: each thread opens a different stream, so this must be function
// scoped
#if 0
FILE *file;
#endif
int sum1 = 0,
    sum2 = 0,
    num1 = 0,
    num2 = 0;

// file name
char fileName[10] = "data1.dat";

// server routine
void *
server_routine()
{
    printf("Server sent signal to worker thread 1\n");
    printf("Server sent signal to worker thread 2\n");
    sem_wait(&s1);

    printf("Server recieved completion signal from worker thread 1\n");
    sem_wait(&s1);
    printf("Server recieved completion signal from worker thread 2\n\n");

    // print the final results
    printf("The sum of the first 500 numbers in the file is: %d\n", sum1);
    printf("The sum of the last 500 numbers in the file is: %d\n\n", sum2);
    pthread_exit(NULL);
}

// thread 1 reoutine
void *
t1_routine()
{
// NOTE/FIX: this must be function scoped (i.e. private to this thread)
#if 1
    FILE *file;
#endif
    printf("Thread 1 recieved signal from server\n");
    file = fopen(fileName, "r");
    for (int i = 0; i < 500; i++) {
        fscanf(file, "%d", &num1);
        sum1 += num1;
    }
    printf("sum in thread 1: %d\n", sum1);
    printf("Thread 1 sends completion signal to server\n");
    sem_post(&s1);
#if 1
    fclose(file);
#endif
    pthread_exit(NULL);
}

// thread 2 routine
void *
t2_routine()
{
// NOTE/FIX: this must be function scoped (i.e. private to this thread)
#if 1
    FILE *file;
#endif
    printf("Thread 2 recieved signal from server\n");
    file = fopen(fileName, "r");
    fseek(file, 500 * 5, SEEK_SET);
    for (int i = 0; i < 500; i++) {
        fscanf(file, "%d", &num2);
        sum2 += num2;
    }
    printf("sum in thread 2: %d\n", sum2);
    printf("Thread 2 sends completion signal to server\n");
    sem_post(&s1);
#if 1
    fclose(file);
#endif
    pthread_exit(NULL);
}

// main function
int
main(int argc, char *argv[])
{
    // define threads
    pthread_t server, t1, t2;

    // initialize the semaphore
    sem_init(&s1, 0, 0);

    if (pthread_create(&server, NULL, &server_routine, NULL) != 0) {
        return 1;
    }

    if (pthread_create(&t1, NULL, &t1_routine, NULL) != 0) {
        return 2;
    }
    if (pthread_create(&t2, NULL, &t2_routine, NULL) != 0) {
        return 3;
    }

    if (pthread_join(server, NULL) != 0) {
        return 4;
    }

    if (pthread_join(t1, NULL) != 0) {
        return 5;
    }
    if (pthread_join(t2, NULL) != 0) {
        return 6;
    }

    // destroy semaphores
    sem_close(&s1);

    // exit thread
// NOTE/BUG: only a subthread should do this
#if 0
    pthread_exit(NULL);
#endif

    // end
    return 0;
}

In the code above, I've used cpp conditionals to denote old vs. new code:

#if 0
// old code
#else
// new code
#endif

#if 1
// new code
#endif

Note: this can be cleaned up by running the file through unifdef -k


UPDATE:

Thank you for the response Craig. I have implemented your suggestions to my own code but nothing seemed to change. I then decided to copy paste your updated code into a c file to test it out and I got the same result. It is as follows (in a separate comment since the output is too long): – Max

It's hard to compare results because we're using different datasets. I created a perl script to create some data.

Most important is that the sum reported by the given worker matches what the server sees for that worker task.

Then, if we know what each per thread section of the file should sum to, that is another matter.

The per line termination is critical (eg): CRLF vs LF (see below)

The actual order of worker sem_post and termination doesn't really matter. It can vary system to system or, even, invocation to invocation. What matters is that the server thread waits for N threads (ie) N sem_wait calls before printing any sums.

I've produced an updated version below.

  1. Server does not "signal" a worker. The worker "signals" the server by doing sem_post and the server "receives" it by doing sem_wait
  2. I've create a task/thread struct to hold the sums, thread IDs, etc.
  3. I've generalized the code to allow N threads.
  4. Added check of \n placement (ie line width). That is, under linux/POSIX a four digit number would be followed by LF (newline) and length would be 5. But, under windows, it would be CRLF (carriage return/newline) and length would be 6.
  5. Added check of file size to ensure it is exactly the desired/expected length.
  6. Some additional diagnostics.

Here is the updated code:

// simple c program to simulate POSIX thread and semaphore
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <semaphore.h>
#include <sys/stat.h>

// number of bytes per line
// 5: 4 digits + LF
// 6: 4 digits + CRLF
#ifndef LINEWID
#define LINEWID     (4 + 1)
#endif

// number of items / task
#ifndef COUNT
#define COUNT       500
#endif

// define semaphores
sem_t s1;

#if 0
int sum1 = 0,
    sum2 = 0,
    num1 = 0,
    num2 = 0;
#endif

// file name
#if 0
char fileName[10] = "data1.dat";
#else
const char *fileName = "data1.dat";
#endif

// task control
typedef struct {
    pthread_t tid;                  // thread ID
    int tno;                        // thread index/offset
    int sum;                        // sum
} tsk_t;

#define TSKMAX  50
int tskmax;                         // actual number of tasks
tsk_t tsklist[TSKMAX];              // list of tasks

// loop through all task blocks
#define TSKFORALL \
    tsk_t *tsk = &tsklist[0];  tsk < &tsklist[tskmax];  ++tsk

// server routine
void *
server_routine(void *vp)
{
// NOTE/BUG: server does _not_ signal worker
#if 0
    printf("Server sent signal to worker thread 1\n");
    printf("Server sent signal to worker thread 2\n");
#endif

    for (TSKFORALL) {
        printf("Server waiting ...\n");
        sem_wait(&s1);
        printf("Server complete ...\n");
    }

    // print the final results
    for (TSKFORALL)
        printf("The sum of task %d is %d\n",tsk->tno,tsk->sum);

    return (void *) 0;
}

// thread 1 reoutine
void *
worker_routine(void *vp)
{
    FILE *file;
    tsk_t *tsk = vp;

    printf("Thread %d running ...\n",tsk->tno);

    file = fopen(fileName, "r");
    fseek(file,tsk->tno * COUNT * LINEWID,SEEK_SET);

    tsk->sum = 0;

    int num1;
    int first = -1;
    int last = -1;
    for (int i = 0; i < COUNT; i++) {
        if (fscanf(file, "%d", &num1) != 1) {
            printf("Thread %d fscan error\n",tsk->tno);
            break;
        }

        if (i == 0)
            first = num1;
        if (i == (COUNT - 1))
            last = num1;

        tsk->sum += num1;
    }

    printf("sum in thread %d: %d (first %d, last %d)\n",
        tsk->tno, tsk->sum, first, last);
    sem_post(&s1);

#if 1
    fclose(file);
#endif

    return (void *) 0;
}

// main function
int
main(int argc, char **argv)
{

    --argc;
    ++argv;

    setlinebuf(stdout);
    setlinebuf(stderr);

    if (argc != 1)
        tskmax = 2;
    else
        tskmax = atoi(*argv);

    if (tskmax > TSKMAX)
        tskmax = TSKMAX;

    // define threads
    pthread_t server;

    printf("main: %d tasks\n",tskmax);
    printf("main: %d count\n",COUNT);

    FILE *file = fopen(fileName,"r");
    if (file == NULL) {
        printf("main: fopen failure\n");
        exit(96);
    }

    // check alignment
    char chr;
    fseek(file,LINEWID - 1,0);
    fread(&chr,1,1,file);
    if (chr != '\n') {
        printf("main: newline mismatch -- chr=%2.2X\n",chr);
        exit(97);
    }

    // get the file size
    struct stat st;
    if (fstat(fileno(file),&st) < 0) {
        printf("main: fstat fault\n");
        exit(97);
    }

    // ensure the file has the correct size
    off_t size = tskmax * LINEWID * COUNT;
    if (st.st_size != size)
        printf("main: wrong file size -- st_size=%llu size=%llu\n",
            (unsigned long long) st.st_size,
            (unsigned long long) size);

    fclose(file);

    // initialize the semaphore
    sem_init(&s1, 0, 0);

    // set the offsets
    int tno = 0;
    for (TSKFORALL, ++tno)
        tsk->tno = tno;

    if (pthread_create(&server, NULL, &server_routine, NULL) != 0)
        return 98;

    for (TSKFORALL) {
        if (pthread_create(&tsk->tid,NULL,worker_routine,tsk) != 0)
            return 1 + tsk->tno;
    }

    if (pthread_join(server, NULL) != 0) {
        return 99;
    }

    for (TSKFORALL) {
        if (pthread_join(tsk->tid, NULL) != 0) {
            return 5;
        }
    }

    // destroy semaphores
    sem_close(&s1);

    // end
    return 0;
}

Here is the perl script output that I used to generate the data:

number of tasks 2
element count per task 500
line separater 0A
section 0 sum 124750
section 1 sum 125250

Here is the program output:

main: 2 tasks
Server waiting ...
Thread 0 running ...
Thread 1 running ...
sum in thread 1: 125250 (first 1, last 500)
sum in thread 0: 124750 (first 0, last 499)
Server complete ...
Server waiting ...
Server complete ...
The sum of task 0 is 124750
The sum of task 1 is 125250

Here is the perl script:

#!/usr/bin/perl
# gendata -- generate data
#
# arguments:
#   1 - number of tasks (DEFAULT: 2)
#   2 - number of items / task (DEFAULT: 500)
#   3 - line separater (DEFAULT: \n)

master(@ARGV);
exit(0);

# master -- master control
sub master
{
    my(@argv) = @_;

    $tskmax = shift(@argv);
    $tskmax //= 2;
    printf(STDERR "number of tasks %d\n",$tskmax);

    $count = shift(@argv);
    $count //= 500;
    printf(STDERR "element count per task %d\n",$count);

    $sep = shift(@argv);
    $sep //= "\n";
    printf(STDERR "line separater");
    foreach $chr (split(//,$sep)) {
        $hex = ord($chr);
        printf(STDERR " %2.2X",$hex);
    }
    printf(STDERR "\n");

    for ($itsk = 0;  $itsk < $tskmax;  ++$itsk) {
        $val = $itsk;
        $sum = 0;
        for ($lno = 1;  $lno <= $count;  ++$lno, ++$val) {
            printf("%4d%s",$val,$sep);
            $sum += $val;
        }
        printf(STDERR "section %d sum %d\n",$itsk,$sum);
    }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM