简体   繁体   中英

How can I parallel this loop with open mp?

I don't know how I can parallel this loops because I have a lot of dependent variables and I am very confused can you help and guide me? the number one is :

for (int a = 0; a < sigmaLen; ++a) {
        int f = freq[a];
        if (f >= sumFreqLB)
            if (updateRemainingDistances(s, a, pos))
                if (prunePassed(pos + 1)) {
                    lmer[pos] = a;
                    enumerateStrings(pos + 1, sumFreqLB - f);
                }
    }

The second one is :

void preprocessLowerBounds() {
    int i = stackSz - 1;
    int pairOffset = (i * (i - 1)) >> 1;
    for (int k = L; k; --k) {
        int *dsn = dist[k] + pairOffset;
        int *ds = dist[k - 1] + pairOffset;
        int *s = colS[k - 1];
        char ci = s[i];
        for (int j = 0; j < i; ++j) {
            char cj = s[j];
            *ds++ = (*dsn++) + (ci != cj);
        }
    }

Really another one is :

    void enumerateSubStrings(int rowNumber, int remainQTolerance) {
    int nItems = rowSize[rowNumber][stackSz];
    if (shouldGenerateNeighborhood(rowNumber, nItems)) {
        bruteForceIt(rowNumber, nItems);
    } else {
        indexType *row = rowItem[rowNumber];
        for (int j = 0; j < nItems; ++j) {
            indexType ind = row[j];
            addString(lmers + ind);
            preprocessLowerBounds();
            uint threshold = maxLB[stackSz] - addMaxFreq();
            if (hasSolution(0, threshold)) {
                if (getValid<hasPreprocessedPairs, useQ>(rowNumber + 1,
                        (stackSz <= 2 ? n : smallN), threshold + LminusD,
                        ind, remainQTolerance)) {
                    enumerateSubStrings<hasPreprocessedPairs, useQ>(
                            rowNumber + 1, remainQTolerance);
                }
            }
            removeLastString();
        }
    }

void addString(const char *t) {
    int *mf = colMf[stackSz + 1];
    for (int j = 0; j < L; ++j) {
        int c = t[j];
        colS[j][stackSz] = c;
        mf[j] = colMaxFreq[j] + (colMaxFreq[j] == colFreq[j][c]++);
    }
    colMaxFreq = mf;
    ++stackSz;
}


void preprocessLowerBounds() {
    int i = stackSz - 1;
    int pairOffset = (i * (i - 1)) >> 1;
    for (int k = L; k; --k) {
        int *dsn = dist[k] + pairOffset;
        int *ds = dist[k - 1] + pairOffset;
        int *s = colS[k - 1];
        char ci = s[i];
        for (int j = 0; j < i; ++j) {
            char cj = s[j];
            *ds++ = (*dsn++) + (ci != cj);
        }
    }
}

void removeLastString() {
    --stackSz;
    for (int j = 0; j < L; ++j)
        --colFreq[j][colS[j][stackSz]];
    colMaxFreq = colMf[stackSz];
}

Ok, For OpenMP to parallelize a loop in your basically follow these two rules, the first never write in the same memory location from different threads and second rule never depend on the reading of a memory area that may modified another thread, Now in the first loop you just change the lmer variable and other operations are read-only variables that I assume are not changing at the same time from another part of your code, so the first loop would be as follows:

#pragma omp for private(s,a,pos) //According to my intuition these variables are global or belong to a class, so you must convert private to each thread, on the other hand sumFreqLB and freq not included because only these reading
for (int a = 0; a < sigmaLen; ++a) {
    int f = freq[a];
    if (f >= sumFreqLB)
        if (updateRemainingDistances(s, a, pos))
            if (prunePassed(pos + 1)) {

                #pragma omp critical //Only one thread at a time can enter otherwise you will fail at runtime
                {             
                lmer[pos] = a;
                }
                enumerateStrings(pos + 1, sumFreqLB - f);
            }
}

In the second loop i could not understand how you're using the for, but you have no problems because you use only reads and only modified the thread local variables.

You must make sure that the functions updateRemainingDistances, prunePassed and enumerateStrings do not use static or global variables within.

In the following function you use most only read operations which can be done from multiple threads (if any thread modifying these variables) and write in local memory positions so just change the shape of the FOR for OpenMP can recognize that FOR.

void preprocessLowerBounds() {
int i = stackSz - 1;
int pairOffset = (i * (i - 1)) >> 1;

#pragma omp for
for (int var=0; var<=k-L; var++){  

    int newK=k-var;//This will cover the initial range and in the same order    

    int *dsn = dist[newK] + pairOffset;
    int *ds = dist[newK - 1] + pairOffset;
    int *s = colS[newK - 1];
    char ci = s[i];
    for (int j = 0; j < i; ++j) {
        char cj = s[j];
        *ds++ = (*dsn++) + (ci != cj);
    }
}

In the last function you use many functions for which I do not know the source code and thus can not know if they are looking for parallelizable example below the following examples are wrong:

std::vector myVector;

void notParalelizable_1(int i){
miVector.push_back(i); 
}

void notParalelizable_2(int i){
static int A=0;
A=A+i;
}

int varGlobal=0;
void notParalelizable_3(int i){
varGlobal=varGlobal+i;
}

void oneFunctionParalelizable(int i)
{
int B=i;
}

int main()
{

#pragma omp for
 for(int i=0;i<10;i++)
 {
 notParalelizable_1(i);//Error because myVector is modified simultaneously from multiple threads, The error here is that myVector not store the values in ascending order as this necessarily being accesing  by multiple threads, this more complex functions can generate erroneous results or even errors in run time.
 }



#pragma omp for
 for(int i=0;i<10;i++)
 {
 notParalelizable_2(i);//Error because A is modified simultaneously from multiple threads
 }

 #pragma omp for
 for(int i=0;i<10;i++)
 {
 notParalelizable_3(i);//Error because varGlobal is modified simultaneously from multiple threads
 }   

 #pragma omp for
 for(int i=0;i<10;i++)
 {
 oneFunctionParalelizable(i);//no problem
 }

//The following code is correct
int *vector=new int[10];

#pragma omp for
 for(int i=0;i<10;i++)
 {
 vector[i]=i;//No problem because each thread writes to a different memory pocicion
 } 

//The following code is wrong
int k=2;
#pragma omp for
for(int i=0;i<10;i++)
{
k=k+i; //The result of the k variable at the end will be wrong as it is modified from different threads  
}

 return 0;   
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM