[英]Parallel programming with C and OpenMP
我正在嘗試將其轉換為 openMP,但我無法正確轉換它,任何幫助將不勝感激。 我現在可以將簡單循環轉換為 openMp,但不能使用函數。 所以試圖了解它是如何工作的。 謝謝!
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)
float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);
int main(int argc, char *argv[])
{
#pragma omp parallel
{
int i, j, k, nc, id;
struct timeval start, stop, elapse;
float fmax = (float)RAND_MAX;
#pragma omp parallel for
for (k = 0; k < FILTERLENGTH; k++)
f[k] = k - (FILTERLENGTH - 1) / 2.0;
for (j = 0; j < TRACE_COUNT; j++)
{
#pragma omp parallel for
for (i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
gettimeofday(&start, NULL);
filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &elapse);
fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
}
}
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL])
{
int i, j, k, nc;
for (j = 0; j < NT; j++)
for (i = 0; i < TL; i++)
for (k = 0; k < FL; k++)
out[j][i] = filt[k] * traces[j][i + k];
}
您的代碼存在一些問題,在 function main
中,循環的迭代沒有按照您的意願分配給線程。 因為您再次添加了與#pragma omp for
parallel
的子句,並假設您已禁用嵌套並行性,默認情況下,在外部並行區域中創建的每個線程都將“按順序”執行該區域內的代碼。 有關它的更多詳細信息,請閱讀此SO 線程。
除此之外,這段代碼:
for (int j = 0; j < TRACE_COUNT; j++)
{
#pragma omp for
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
可以通過將#pragma omp for
移到外循環來改進:
#pragma omp for
for (int j = 0; j < TRACE_COUNT; j++)
{
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
並且應該減少並行區域的scope。 一切都放在一起:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)
float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);
int main(int argc, char *argv[])
{
#pragma omp parallel
{
float fmax = (float)RAND_MAX;
#pragma omp for nowait
for (int k = 0; k < FILTERLENGTH; k++)
f[k] = k - (FILTERLENGTH - 1) / 2.0;
#pragma omp for nowait
for (int j = 0; j < TRACE_COUNT; j++)
{
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
}
struct timeval start, stop, elapse;
gettimeofday(&start, NULL);
filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &elapse);
fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
}
您仍然可以嘗試並行化 function filter
。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.