[英]Should Eigen dense matrix * dense vector multiplication be 7 times slower than GSL?
The answer to this question of mine made me expect that (for matrices with 1/4 of non-vanishing entries) in Eigen the product Dense Matrix * Dense Vector should outperform Sparse Matrix*Dense Vector.我的这个问题的答案让我期待(对于具有 1/4 非消失条目的矩阵)在 Eigen 中,产品密集矩阵 * 密集向量应该优于稀疏矩阵 * 密集向量。
Not only do I see the opposite, but also both are outperformed by GSL by a factor of 7 and 4 respectively.我不仅看到了相反的情况,而且两者的表现都比 GSL 分别高出 7 和 4 倍。
Am I using Eigen incorrectly?我是否错误地使用了 Eigen? Am I timing carelessly?
我是不是不小心计时? I am very startled.
我很吃惊。
My compile options read:我的编译选项如下:
ludi@ludi-M17xR4:~/Desktop/tests$ g++ -o eigenfill.x eigenfill.cc -L/usr/local/lib -lgsl -lgslcblas && ./eigenfill.x
ludi@ludi-M17xR4:~/Desktop/tests$ g++ -o eigenfill.x eigenfill.cc -L/usr/local/lib -lgsl -lgslcblas && ./eigenfill.x
My code reads:我的代码如下:
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <Eigen/Sparse>
#include <Eigen/Dense>
#include <gsl/gsl_matrix.h>
#include <sys/time.h>
#include <gsl/gsl_blas.h>
#define helix 100
#define rows helix*helix
#define cols rows
#define filling rows/4
#define REPS 10
using namespace Eigen;
/*-- DECLARATIONES --*/
int FillSparseMatrix(SparseMatrix<double> & mat);
int FillDenseMatrices(MatrixXd & Mat, gsl_matrix *testmat);
double vee(int i, int j);
int set_vectors_randomly(gsl_vector * v2, VectorXd v1);
int main()
{
int rep;
struct timeval tval_before, tval_after, tval_result;
gsl_matrix *testmat = gsl_matrix_calloc(rows, cols);
gsl_vector *v2 =gsl_vector_calloc(cols);
gsl_vector *prod =gsl_vector_calloc(cols);
SparseMatrix<double> mat(rows,cols); // default is column major
MatrixXd Mat(rows,cols); // default is column major
VectorXd v1(cols), vv1(cols);
FillSparseMatrix(mat);
FillDenseMatrices(Mat, testmat);
printf("\n/*--- --- --- ---*/\n");
for(rep=0;rep<REPS;rep++)
{
set_vectors_randomly(v2, v1);
gettimeofday(&tval_before, NULL);
vv1 = mat*v1;
gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result);
printf("Time for one product, SPARSE EIGEN / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);
gettimeofday(&tval_before, NULL);
gsl_blas_dgemv( CblasNoTrans,1.0, testmat, v2, 0.0, prod);
gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result);
printf("Time for one product, GSL / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);
gettimeofday(&tval_before, NULL);
vv1 = Mat*v1;
gettimeofday(&tval_after, NULL);
timersub(&tval_after, &tval_before, &tval_result);
printf("Time for one product, DENSE EIGEN / secs: %ld.%06ld\n", (long int)tval_result.tv_sec, (long int)tval_result.tv_usec);
printf("/*--- --- --- ---*/\n\n");
//std::cout << mat << std::endl;
}
gsl_matrix_free(testmat);
printf("--- --- --->DONE\n");
return(0);
}
/*-- --*/
int FillSparseMatrix(SparseMatrix<double> &mat)
{
int i, j;
Eigen::VectorXd Vres;
mat.reserve(Eigen::VectorXi::Constant(cols,filling));
printf("Filling Sparse Matrix ...");
for(i=0;i<rows;i++)
{
if(i%2500==0){printf("i= %i\n", i);}
for(j=0;j<cols;j++)
{
if (vee(i,j) != 0){mat.insert(i,j) = vee(i,j); /*alternative: mat.coeffRef(i,j) += v_ij;*/ }
}
}
return(0);
}
/*-- --*/
/*-- --*/
int FillDenseMatrices(MatrixXd &Mat, gsl_matrix * testmat)
{
int i, j;
Eigen::VectorXd Vres;
double aux;
printf("Filling Dense Matrix ...");
for(i=0;i<rows;i++)
{
if(i%2500==0){printf("i= %i\n", i);}
for(j=0;j<cols;j++)
{
aux = vee(i,j);
if (aux != 0)
{
Mat(i,j) = aux;
gsl_matrix_set(testmat, i, j, aux);
}
}
}
return(0);
}
/*-- --*/
double vee(int i, int j)
{
double result = 0.0;
if(i%4 == 0){result =1.0;}
return result;
}
/*-- --*/
int set_vectors_randomly(gsl_vector * v2, VectorXd v1){
printf("Setting vectors rendomly anew ...\n");
for (int j=0;j<cols;j++)
{
double r=drand48();
v1(j) =r;
gsl_vector_set(v2, j, r);
}
return(0);
}
/*-- --*/
With Eigen, performance is abysmal when compiling without compiler optimizations.使用 Eigen,在没有编译器优化的情况下编译时性能非常糟糕。 There are several ways to increase performance dramatically:
有几种方法可以显着提高性能:
NDEBUG
before including the Eigen library.NDEBUG
可以获得额外的(较小的)加速。 This disables bounds checking, so make sure there are no issues before enabling this feature.Actually, Spaesematrix is columnmajor by default which is not suitable for product with vector.实际上,Spaesematrix 默认是 columnmajor,不适合与 vector 的乘积。 Use Spaesematrix and you would find it be more fast.
使用 Spaesematrix,你会发现它更快。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.