简体   繁体   中英

Parallel programming with C and OpenMP

I am trying to convert this to openMP but I am not able to convert it properly any help would be appreciated. I can now convert simple loop to openMp but not with functions. So trying to learn how it works. Thanks!

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)

float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];

void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);

int main(int argc, char *argv[])
{
    #pragma omp parallel
    {
        int i, j, k, nc, id;
        struct timeval start, stop, elapse;

        float fmax = (float)RAND_MAX;
        #pragma omp parallel for
        for (k = 0; k < FILTERLENGTH; k++)
            f[k] = k - (FILTERLENGTH - 1) / 2.0;
        for (j = 0; j < TRACE_COUNT; j++)
        {
            #pragma omp parallel for
            for (i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;
            for (i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }
        gettimeofday(&start, NULL);
        filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &elapse);

        fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
        fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
        fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
        fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
    }
}

void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL])
{
    int i, j, k, nc;
    for (j = 0; j < NT; j++)
        for (i = 0; i < TL; i++)
            for (k = 0; k < FL; k++)
                out[j][i] = filt[k] * traces[j][i + k];
}

There are some issues with your code, in the function main the iterations of the loop are not being assigned to threads as you wanted. Because you have added again the clause parallel to #pragma omp for , and assuming that you have nested parallelism disabled, which by default it is, each of the threads created in the outer parallel region will execute "sequentially" the code within that region. For more detail about it read this SO thread .

Besides that this code:

        for (int j = 0; j < TRACE_COUNT; j++)
        {
            #pragma omp for
            for (int i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;

            for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }

can be improve by moving the #pragma omp for to the outer loop:

        #pragma omp for
        for (int j = 0; j < TRACE_COUNT; j++)
        {
            for (int i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;
            for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }

And the scope of the parallel region should be reduced. Everything put together:

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)

float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];

void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);

int main(int argc, char *argv[])
{
    #pragma omp parallel
    {
        float fmax = (float)RAND_MAX;
        #pragma omp for nowait
        for (int k = 0; k < FILTERLENGTH; k++)
            f[k] = k - (FILTERLENGTH - 1) / 2.0;

        #pragma omp for nowait
        for (int j = 0; j < TRACE_COUNT; j++)
        {    
            for (int i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;
            for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }
     }
     struct timeval start, stop, elapse;
     gettimeofday(&start, NULL);
     filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
     gettimeofday(&stop, NULL);
     timersub(&stop, &start, &elapse);

     fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
     fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
     fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
     fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);

}

You can still try to parallelize the function filter .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM