简体   繁体   English

使用 C 和 OpenMP 进行并行编程

[英]Parallel programming with C and OpenMP

I am trying to convert this to openMP but I am not able to convert it properly any help would be appreciated.我正在尝试将其转换为 openMP,但我无法正确转换它,任何帮助将不胜感激。 I can now convert simple loop to openMp but not with functions.我现在可以将简单循环转换为 openMp,但不能使用函数。 So trying to learn how it works.所以试图了解它是如何工作的。 Thanks!谢谢!

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)

float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];

void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);

int main(int argc, char *argv[])
{
    #pragma omp parallel
    {
        int i, j, k, nc, id;
        struct timeval start, stop, elapse;

        float fmax = (float)RAND_MAX;
        #pragma omp parallel for
        for (k = 0; k < FILTERLENGTH; k++)
            f[k] = k - (FILTERLENGTH - 1) / 2.0;
        for (j = 0; j < TRACE_COUNT; j++)
        {
            #pragma omp parallel for
            for (i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;
            for (i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }
        gettimeofday(&start, NULL);
        filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &elapse);

        fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
        fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
        fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
        fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
    }
}

void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL])
{
    int i, j, k, nc;
    for (j = 0; j < NT; j++)
        for (i = 0; i < TL; i++)
            for (k = 0; k < FL; k++)
                out[j][i] = filt[k] * traces[j][i + k];
}

There are some issues with your code, in the function main the iterations of the loop are not being assigned to threads as you wanted.您的代码存在一些问题,在 function main中,循环的迭代没有按照您的意愿分配给线程。 Because you have added again the clause parallel to #pragma omp for , and assuming that you have nested parallelism disabled, which by default it is, each of the threads created in the outer parallel region will execute "sequentially" the code within that region.因为您再次添加了与#pragma omp for parallel的子句,并假设您已禁用嵌套并行性,默认情况下,在外部并行区域中创建的每个线程都将“按顺序”执行该区域内的代码。 For more detail about it read this SO thread .有关它的更多详细信息,请阅读此SO 线程

Besides that this code:除此之外,这段代码:

        for (int j = 0; j < TRACE_COUNT; j++)
        {
            #pragma omp for
            for (int i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;

            for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }

can be improve by moving the #pragma omp for to the outer loop:可以通过将#pragma omp for移到外循环来改进:

        #pragma omp for
        for (int j = 0; j < TRACE_COUNT; j++)
        {
            for (int i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;
            for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }

And the scope of the parallel region should be reduced.并且应该减少并行区域的scope。 Everything put together:一切都放在一起:

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)

float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];

void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);

int main(int argc, char *argv[])
{
    #pragma omp parallel
    {
        float fmax = (float)RAND_MAX;
        #pragma omp for nowait
        for (int k = 0; k < FILTERLENGTH; k++)
            f[k] = k - (FILTERLENGTH - 1) / 2.0;

        #pragma omp for nowait
        for (int j = 0; j < TRACE_COUNT; j++)
        {    
            for (int i = 0; i < FILTERLENGTH / 2; i++)
                d[j][i] = 0;
            for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
                d[j][i] = rand() / fmax;
            for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
                d[j][i] = 0;
        }
     }
     struct timeval start, stop, elapse;
     gettimeofday(&start, NULL);
     filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
     gettimeofday(&stop, NULL);
     timersub(&stop, &start, &elapse);

     fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
     fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
     fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
     fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);

}

You can still try to parallelize the function filter .您仍然可以尝试并行化 function filter

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM