简体   繁体   English

矩阵反转使用线程较慢

[英]Matrix inversion slower using threads

I made a function that makes the inverse and then another multithreaded, as long I have to make inverse of arrays >2000 x 2000. A 1000x1000 array unthreated takes 2.5 seconds (on a i5-4460 4 cores 2.9ghz) and multithreaded takes 7.25 seconds 我制作了一个使该函数求逆的函数,然后再使另一个线程成为多线程,只要我必须使数组的求逆> 2000 x2000。不加处理的1000x1000数组花费2.5秒(在i5-4460 4核2.9ghz上),多线程花费7.25秒

I placed the multithreads in the part that most time consumption is taken. 我将多线程放在消耗大量时间的部分。 Whai is wrong? 怀恩错了吗? Is due vectors are used instead of 2 dimensions arrays? 是否使用适当的向量而不是2维数组?

This is the minimum code to test both versions: 这是测试两个版本的最低代码:

#include<iostream>
#include <vector>
#include <stdlib.h>
#include <time.h>
#include <chrono>
#include <thread>
const int NUCLEOS = 8;

#ifdef __linux__ 
#include <unistd.h>    //usleep()
typedef std::chrono::system_clock t_clock;    //try to use high_resolution_clock on  new linux x64 computer!
#else
typedef std::chrono::high_resolution_clock t_clock;
#pragma warning(disable:4996)
#endif
using namespace std;


std::chrono::time_point<t_clock> start_time, stop_time = start_time; char null_char = '\0';
void timer(char *title = 0, int data_size = 1) { stop_time = t_clock::now(); double us = (double)chrono::duration_cast<chrono::microseconds>(stop_time - start_time).count();   if (title) printf("%s time = %7lgms = %7lg MOPs\n", title, (double)us*1e-3, (double)data_size / us); start_time = t_clock::now(); }



//makes columns 0
void colum_zero(vector< vector<double> > &x, vector< vector<double> > &y, int pos0, int pos1,int dim, int ord);

//returns inverse of x, x is not modified, not threaded
vector< vector<double> > inverse(vector< vector<double> > x)
{
    if (x.size() != x[0].size())
    {
        cout << "ERROR on inverse() not square array" << endl; getchar(); return{};//returns a null
    }

    size_t dim = x.size();
    int i, j, ord;
    vector< vector<double> > y(dim,vector<double>(dim,0));//initializes output = 0
    //init_2Dvector(y, dim, dim);
    //1. Unity array y: 
    for (i = 0; i < dim; i++)
    {
        y[i][i] = 1.0;
    }

    double diagon, coef;
    double *ptrx, *ptry, *ptrx2, *ptry2;
    for (ord = 0; ord<dim; ord++)
    {
        //2 Hacemos diagonal de x =1
        int i2;
        if (fabs(x[ord][ord])<1e-15) //If that element is 0, a line that contains a non zero is added
        {
            for (i2 = ord + 1; i2<dim; i2++)
            {
                if (fabs(x[i2][ord])>1e-15) break;
            }
            if (i2 >= dim)
                return{};//error, returns null
            for (i = 0; i<dim; i++)//added a line without 0
            {
                x[ord][i] += x[i2][i];
                y[ord][i] += y[i2][i];
            }
        }
        diagon = 1.0/x[ord][ord];
        ptry = &y[ord][0];
        ptrx = &x[ord][0];
        for (i = 0; i < dim; i++)
        {
            *ptry++ *= diagon;
            *ptrx++ *= diagon;
        }
        //uses the same function but not threaded:
        colum_zero(x,y,0,dim,dim,ord);
    }//end ord
    return y;
}

//threaded version
vector< vector<double> > inverse_th(vector< vector<double> > x)
{
    if (x.size() != x[0].size())
    {
        cout << "ERROR on inverse() not square array" << endl; getchar(); return{};//returns a null
    }

    int dim = (int) x.size();
    int i, ord;
    vector< vector<double> > y(dim, vector<double>(dim, 0));//initializes output = 0
                                                            //init_2Dvector(y, dim, dim);
                                                            //1. Unity array y: 
    for (i = 0; i < dim; i++)
    {
        y[i][i] = 1.0;
    }

    std::thread tarea[NUCLEOS];
    double diagon;
    double *ptrx, *ptry;// , *ptrx2, *ptry2;
    for (ord = 0; ord<dim; ord++)
    {
        //2 Hacemos diagonal de x =1
        int i2;
        if (fabs(x[ord][ord])<1e-15) //If a diagonal element=0 it is added a column that is not 0 the diagonal element
        {
            for (i2 = ord + 1; i2<dim; i2++)
            {
                if (fabs(x[i2][ord])>1e-15) break;
            }
            if (i2 >= dim)
                return{};//error, returns null
            for (i = 0; i<dim; i++)//It is looked for a line without zero to be added to make the number a non zero one to avoid later divide by 0
            {
                x[ord][i] += x[i2][i];
                y[ord][i] += y[i2][i];
            }
        }
        diagon = 1.0 / x[ord][ord];

        ptry = &y[ord][0];
        ptrx = &x[ord][0];
        for (i = 0; i < dim; i++)
        {
            *ptry++ *= diagon;
            *ptrx++ *= diagon;
        }

        int pos0 = 0, N1 = dim;//initial array position
        if ((N1<1) || (N1>5000))
        {
            cout << "It is detected out than 1-5000 simulations points=" << N1 << " ABORT or press enter to continue" << endl; getchar();
        }
        //cout << "Initiation of " << NUCLEOS << " threads" << endl;
        for (int thread = 0; thread<NUCLEOS; thread++)
        {
            int pos1 = (int)((thread + 1)*N1 / NUCLEOS);//next position
            tarea[thread] = std::thread(colum_zero, std::ref(x), std::ref(y), pos0, pos1, dim, ord);//ojo, coil current=1!!!!!!!!!!!!!!!!!!
            pos0 = pos1;//next thread will work at next point
        }
        for (int thread = 0; thread<NUCLEOS; thread++)
        {
            tarea[thread].join();
            //cout << "Thread num: " << thread << " end\n";
        }
    }//end ord
    return y;
}

//makes columns 0
void colum_zero(vector< vector<double> > &x, vector< vector<double> > &y, int pos0, int pos1,int dim, int ord)
{
    double coef;
    double *ptrx, *ptry, *ptrx2, *ptry2;
    //Hacemos '0' la columna ord salvo elemento diagonal:
    for (int i = pos0; i<pos1; i++)//Begin to end for every thread
    {
        if (i == ord) continue;
        coef = x[i][ord];//element to make 0 
        if (fabs(coef)<1e-15) continue; //If already zero, it is avoided
        ptry = &y[i][0];
        ptry2 = &y[ord][0];
        ptrx = &x[i][0];
        ptrx2 = &x[ord][0];
        for (int j = 0; j < dim; j++)
        {
            *ptry++ = *ptry - coef * (*ptry2++);//1ª matriz
            *ptrx++ = *ptrx - coef * (*ptrx2++);//2ª matriz
        }
    }
}


void test_6_inverse(int dim)
{
    vector< vector<double> > vec1(dim, vector<double>(dim));
    for (int i=0;i<dim;i++)
        for (int j = 0; j < dim; j++)
        {
            vec1[i][j] = (-1.0 + 2.0*rand() / RAND_MAX) * 10000;
        }
    vector< vector<double> > vec2,vec3;
    double ini, end;
    ini = (double)clock();
    vec2 = inverse(vec1);
    end = (double)clock();
    cout << "=== Time inverse unthreaded=" << (end - ini) / CLOCKS_PER_SEC << endl;
    ini=end;
    vec3 = inverse_th(vec1);
    end = (double)clock();
    cout << "=== Time inverse   threaded=" << (end - ini) / CLOCKS_PER_SEC << endl;
    cout<<vec2[2][2]<<" "<<vec3[2][2]<<endl;//to make the sw to do de inverse
    cout << endl;
}


int main()
{
    test_6_inverse(1000);
    cout << endl << "=== END ===" << endl; getchar(); 
    return 1;
}

After looking deeper in the code of the colum_zero() function I have seen that one thread rewrites in the data to be used by another threads, so the threads are not INDEPENDENT from each other. 在深入研究colum_zero()函数的代码后,我看到一个线程重写了要由另一个线程使用的数据,因此这些线程彼此之间不是独立的。 Fortunately the compiler detect it and avoid it. 幸运的是,编译器检测到它并避免了它。

Conclusions: 结论:

  1. It is not recommended to try Gauss-Jordan method alone to make multithreads 不建议单独尝试使用Gauss-Jordan方法来创建多线程
  2. If somebody detects that in multithread is slower and the initial function is spreaded correctly for every thread, perhaps is due one thread results are used by another 如果有人检测到多线程中的速度较慢,并且每个线程的初始函数均正确分布,则可能是由于一个线程的结果被另一线程使用了
  3. The main function inverse() works and can be used by other programmers, so this question should not be deleted 主要函数inverse()可以工作,并且可以被其他程序员使用,因此不应删除此问题

Non answered question: What is a matrix inverse method that could be spreaded in a lot of independent threads to be used in a gpu? 未回答的问题:什么是矩阵逆方法可以在gpu中使用的许多独立线程中扩展?

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM