I try to write use magma library in matlab, so basically I write a mexfunction which incorporate c code using magma function and then compile this mexfunction into mexa64 file, thus I could use in matlab.
The mexfunction or source c code is below:(called eig_magma)
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cuda_runtime_api.h>
#include <cublas.h>
// includes, project
#include "flops.h"
#include "magma.h"
#include "magma_lapack.h"
#include "testings.h"
#include <math.h>
#include "mex.h"
#define A(i,j) A[i + j*lda]
extern "C"
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
#define L_OUT plhs[0]
#define A_IN prhs[0]
#define S_OUT plhs[1]
magma_init();
real_Double_t gpu_perf, gpu_time;
double *h_A, *h_R, *h_work, *L,*S;
double *w;
magma_int_t *iwork;
magma_int_t N, n2, info, lwork, liwork, lda, aux_iwork[1];
double aux_work[1];
magma_vec_t jobz = MagmaVec;
magma_uplo_t uplo = MagmaLower;
magma_int_t i,j;
N = mxGetM(A_IN);
lda = N;
n2 = lda*N;
// query for workspace sizes
magma_dsyevd( jobz, uplo,
N, NULL, lda, NULL,
aux_work, -1,
aux_iwork, -1,
&info );
lwork = (magma_int_t) aux_work[0];
liwork = aux_iwork[0];
TESTING_MALLOC_CPU( h_A, double, n2);
h_A = (double *)mxGetData(A_IN);
L_OUT = mxCreateDoubleMatrix(N,N,mxREAL);
L = mxGetPr(L_OUT);
S_OUT = mxCreateDoubleMatrix(N,1,mxREAL);
S = mxGetPr(S_OUT);
//print and check
printf("print out h_A\n");
for(i=0;i<N;i++)
{
for(j=0;j<N;j++)
printf("%f\t",h_A[i+j*lda]);
printf("\n");
}
/* Allocate host memory for the matrix */
TESTING_MALLOC_CPU( w, double, N );
TESTING_MALLOC_CPU( iwork, magma_int_t, liwork );
TESTING_MALLOC_PIN( h_R, double, N*lda );
TESTING_MALLOC_PIN( h_work, double, lwork );
printf("allocate memory on cpu with pin!\n");
printf("copy h_A to h_R, and then print h_R\n");
memcpy( h_R, h_A, sizeof(double)*n2);
// lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda,h_R, &lda );
// print and check
for(i=0;i<N;i++)
{
for(j=0;j<N;j++)
{
printf("%f\t",h_R[i+j*lda]);
}
printf("\n");
}
/* Performs operation using MAGA */
gpu_time = magma_wtime();
printf("start eig\n");
magma_dsyevd( jobz, uplo,
N, h_R, lda, w,
h_work, lwork,
iwork, liwork,
&info );
gpu_time = magma_wtime() - gpu_time;
printf("%d\n",info);
if (info != 0)
printf("magma_dsyevd returned error %d: %s.\n",
(int) info, magma_strerror( info ));
printf("convey the output\n");
memcpy(L,h_R,sizeof(double)*n2);
memcpy(S,w,sizeof(double)*N);
TESTING_FREE_CPU( w );
TESTING_FREE_CPU( iwork );
TESTING_FREE_PIN( h_R );
TESTING_FREE_PIN( h_work);
TESTING_FINALIZE();
}
Because using magma need cuda and magma lib I write the makefile to compile the code into mex file:
# Paths where MAGMA, CUDA, and OpenBLAS are installed
MAGMADIR = /home/yuxin/magma-1.5.0
CUDADIR = /usr/local/cuda
MATLABHOME = /opt/share/MATLAB/R2012b.app/
CC = icpc
LD = icpc
CFLAGS = -Wall
LDFLAGS = -Wall -mkl=parallel
MEX_CFLAGS = -fPIC -ansi -pthread -DMX_COMPAT_32 \
-DMATLAB_MEX_FILE
MAGMA_CFLAGS := -DADD_ -DHAVE_CUBLAS -I$(MAGMADIR)/include -I$(CUDADIR)/include -I$(MAGMADIR)/testing
MAGMA_LIBS := -L$(MAGMADIR)/lib -L$(CUDADIR)/lib64 \
-lmagma -lcublas -lcudart
MEXLIBS := -L/opt/share/MATLAB/R2012b.app/bin/glnxa64 -lmx -lmex -lmat -lm -lstdc++
MEX_INCLU := -I$(MATLABHOME)/extern/include -I$(MATLABHOME)/simulink/include
MEXFLAGS := -shared
OBJECT = eig_magma.o
EXECUTABLE = eig_magma
$(EXECUTABLE): $(OBJECT)
$(CC) $(LDFLAGS) $(MEXFLAGS) $(OBJECT) -o $@ $(MAGMA_LIBS) $(MEXLIBS)
eig_magma.o: eig_magma.c
$(CC) $(CFLAGS) $(MAGMA_CFLAGS) $(MEX_INCLU) $(MEX_CFLAGS) -c $< -o $@
clean:
rm -rf eig_magma *.o eig_magma.mexa64
It could be compiled successfully, however when I run the eig_magma in matlab, and when matlab execute to
magma_dsyevd( jobz, uplo, N, h_R, lda, w, h_work, lwork, iwork, liwork, &info );
matlab crashed.......I also have tried to only write the eig_magma in c, without using mex function, it was compiled successfully and works fine.
Anyone has idea how to solve this problem?
Thank you
yuxin
Let me give an example. Since I never worked with MAGMA before, I'm showing the same code only using regular LAPACK (code adapted from a previous answer I gave). It shouldn't be difficult to convert it to using MAGMA equivalent functions instead.
Note that MATLAB already ships with the BLAS/LAPACK libraries and the necessary header files for you to use from MEX-files. In fact this is the Intel MKL library, so it's a well-optimized implementation.
#include "mex.h"
#include "lapack.h"
/*
// the following prototype is defined in "lapack.h" header
extern void dsyevd(char *jobz, char *uplo,
ptrdiff_t *n, double *a, ptrdiff_t *lda, double *w,
double *work, ptrdiff_t *lwork, ptrdiff_t *iwork, ptrdiff_t *liwork,
ptrdiff_t *info);
*/
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
// validate input
if (nrhs != 1 || nlhs > 2)
mexErrMsgIdAndTxt("mex:error", "Wrong number of arguments.");
if (!mxIsDouble(prhs[0]) || mxIsComplex(prhs[0]))
mexErrMsgIdAndTxt("mex:error", "Input is not real double matrix.");
mwSignedIndex m = mxGetM(prhs[0]), n = mxGetN(prhs[0]);
if (m != n)
mexErrMsgIdAndTxt("mex:error", "Input is not square symmetric matrix.");
// allocate output
plhs[0] = mxDuplicateArray(prhs[0]);
plhs[1] = mxCreateDoubleMatrix(1, n, mxREAL);
double *A = mxGetPr(plhs[0]);
double *w = mxGetPr(plhs[1]);
// query and allocate the optimal workspace size
double workopt = 0;
mwSignedIndex iworkopt = 0;
mwSignedIndex lwork = -1, liwork = -1, info = 0;
dsyevd("Vectors", "Upper", &n, A, &n, w,
&workopt, &lwork, &iworkopt, &liwork, &info);
lwork = (mwSignedIndex) workopt;
liwork = (mwSignedIndex) iworkopt;
double *work = (double*) mxMalloc(lwork * sizeof(double));
mwSignedIndex *iwork = (mwSignedIndex*) mxMalloc(liwork * sizeof(mwSignedIndex));
// compute eigenvectors/eigenvalues
dsyevd("Vectors", "Upper", &n, A, &n, w,
work, &lwork, iwork, &liwork, &info);
mxFree(work);
mxFree(iwork);
// check successful computation
if (info < 0)
mexErrMsgIdAndTxt("mex:error", "Illegal values in arguments.");
else if (info > 0)
mexWarnMsgIdAndTxt("mex:warn", "Algorithm failed to converge.");
}
First we compile the MEX-file (I'm using VS2013 compiler on Windows):
>> mex -largeArrayDims eig_lapack.cpp libmwlapack.lib
Now I compare against the built-in eig
function :
>> A = [1 2 3 4 5; 2 2 3 4 5; 3 3 3 4 5; 4 4 4 4 5; 5 5 5 5 5]
A =
1 2 3 4 5
2 2 3 4 5
3 3 3 4 5
4 4 4 4 5
5 5 5 5 5
>> [V,D] = eig(A)
V =
0.6420 -0.5234 0.3778 -0.1982 0.3631
0.4404 0.1746 -0.6017 0.5176 0.3816
0.1006 0.6398 -0.0213 -0.6356 0.4196
-0.2708 0.2518 0.6143 0.5063 0.4791
-0.5572 -0.4720 -0.3427 -0.1801 0.5629
D =
-3.1851 0 0 0 0
0 -0.7499 0 0 0
0 0 -0.3857 0 0
0 0 0 -0.2769 0
0 0 0 0 19.5976
>> [VV,DD] = eig_lapack(A)
VV =
0.6420 -0.5234 0.3778 -0.1982 0.3631
0.4404 0.1746 -0.6017 0.5176 0.3816
0.1006 0.6398 -0.0213 -0.6356 0.4196
-0.2708 0.2518 0.6143 0.5063 0.4791
-0.5572 -0.4720 -0.3427 -0.1801 0.5629
DD =
-3.1851 -0.7499 -0.3857 -0.2769 19.5976
the results are the same (though not guaranteed, seeing that eigenvectors are defined up-to a scale ):
>> V-VV, D-diag(DD)
ans =
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
ans =
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.