简体   繁体   中英

using magma_dysevd in mex file matlab

I try to write use magma library in matlab, so basically I write a mexfunction which incorporate c code using magma function and then compile this mexfunction into mexa64 file, thus I could use in matlab.

The mexfunction or source c code is below:(called eig_magma)

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <math.h>
#include <cuda_runtime_api.h>
#include <cublas.h>

// includes, project
#include "flops.h"
#include "magma.h"
#include "magma_lapack.h"
#include "testings.h"


#include <math.h>
#include "mex.h"

#define A(i,j)  A[i + j*lda]
extern "C"


void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    #define L_OUT plhs[0]
    #define A_IN  prhs[0]
    #define S_OUT plhs[1]

    magma_init();
    real_Double_t gpu_perf, gpu_time;
    double *h_A, *h_R, *h_work, *L,*S;
    double *w;
    magma_int_t *iwork;
    magma_int_t N, n2, info, lwork, liwork, lda, aux_iwork[1];
    double aux_work[1];

    magma_vec_t jobz = MagmaVec;
    magma_uplo_t uplo = MagmaLower;

    magma_int_t i,j;

    N      = mxGetM(A_IN);  
    lda    = N;
    n2     = lda*N;

     // query for workspace sizes
        magma_dsyevd( jobz, uplo,
                      N, NULL, lda, NULL,
                      aux_work,  -1,
                      aux_iwork, -1,
                      &info );
        lwork  = (magma_int_t) aux_work[0];
        liwork = aux_iwork[0];


    TESTING_MALLOC_CPU( h_A, double, n2);
        h_A    = (double *)mxGetData(A_IN);
        L_OUT = mxCreateDoubleMatrix(N,N,mxREAL);
        L  = mxGetPr(L_OUT);
    S_OUT = mxCreateDoubleMatrix(N,1,mxREAL);
        S  = mxGetPr(S_OUT);

//print and check
        printf("print out h_A\n");
        for(i=0;i<N;i++)
        {
            for(j=0;j<N;j++)
                    printf("%f\t",h_A[i+j*lda]);
            printf("\n");
            }

    /* Allocate host memory for the matrix */
    TESTING_MALLOC_CPU( w,     double, N     );
    TESTING_MALLOC_CPU( iwork,  magma_int_t, liwork );

    TESTING_MALLOC_PIN( h_R,    double, N*lda  );
        TESTING_MALLOC_PIN( h_work, double, lwork  );   

    printf("allocate memory on cpu with pin!\n");
    printf("copy h_A to h_R, and then print h_R\n");
    memcpy( h_R, h_A, sizeof(double)*n2);
//  lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda,h_R, &lda );
//  print and check   
    for(i=0;i<N;i++)
    {   
        for(j=0;j<N;j++)
        {
           printf("%f\t",h_R[i+j*lda]);
        }
    printf("\n");
    }


    /* Performs operation using MAGA */
    gpu_time = magma_wtime();
    printf("start eig\n");
    magma_dsyevd( jobz, uplo,
                          N, h_R, lda, w,
                          h_work, lwork,
                          iwork, liwork,
                          &info );
        gpu_time = magma_wtime() - gpu_time;
    printf("%d\n",info);
        if (info != 0)
        printf("magma_dsyevd returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));

    printf("convey the output\n");
    memcpy(L,h_R,sizeof(double)*n2);    
    memcpy(S,w,sizeof(double)*N);   

    TESTING_FREE_CPU( w );
    TESTING_FREE_CPU( iwork );
    TESTING_FREE_PIN( h_R );
    TESTING_FREE_PIN( h_work);
    TESTING_FINALIZE();

} 

Because using magma need cuda and magma lib I write the makefile to compile the code into mex file:

# Paths where MAGMA, CUDA, and OpenBLAS are installed
MAGMADIR     = /home/yuxin/magma-1.5.0
CUDADIR      = /usr/local/cuda

MATLABHOME   = /opt/share/MATLAB/R2012b.app/

CC            = icpc
LD            = icpc
CFLAGS        = -Wall
LDFLAGS       = -Wall -mkl=parallel

MEX_CFLAGS     = -fPIC -ansi -pthread -DMX_COMPAT_32 \
               -DMATLAB_MEX_FILE

MAGMA_CFLAGS := -DADD_ -DHAVE_CUBLAS -I$(MAGMADIR)/include -I$(CUDADIR)/include -I$(MAGMADIR)/testing

MAGMA_LIBS   := -L$(MAGMADIR)/lib -L$(CUDADIR)/lib64 \
                -lmagma -lcublas -lcudart

MEXLIBS      := -L/opt/share/MATLAB/R2012b.app/bin/glnxa64 -lmx -lmex -lmat -lm -lstdc++

MEX_INCLU    := -I$(MATLABHOME)/extern/include -I$(MATLABHOME)/simulink/include

MEXFLAGS     := -shared

OBJECT = eig_magma.o
EXECUTABLE = eig_magma

$(EXECUTABLE): $(OBJECT)
    $(CC) $(LDFLAGS) $(MEXFLAGS) $(OBJECT) -o $@ $(MAGMA_LIBS) $(MEXLIBS)
eig_magma.o: eig_magma.c
    $(CC) $(CFLAGS) $(MAGMA_CFLAGS) $(MEX_INCLU) $(MEX_CFLAGS) -c $< -o $@


clean:
    rm -rf eig_magma *.o eig_magma.mexa64

It could be compiled successfully, however when I run the eig_magma in matlab, and when matlab execute to

magma_dsyevd( jobz, uplo, N, h_R, lda, w, h_work, lwork, iwork, liwork, &info );

matlab crashed.......I also have tried to only write the eig_magma in c, without using mex function, it was compiled successfully and works fine.

Anyone has idea how to solve this problem?

Thank you

yuxin

Let me give an example. Since I never worked with MAGMA before, I'm showing the same code only using regular LAPACK (code adapted from a previous answer I gave). It shouldn't be difficult to convert it to using MAGMA equivalent functions instead.

Note that MATLAB already ships with the BLAS/LAPACK libraries and the necessary header files for you to use from MEX-files. In fact this is the Intel MKL library, so it's a well-optimized implementation.

eig_lapack.cpp

#include "mex.h"
#include "lapack.h"

/*
// the following prototype is defined in "lapack.h" header
extern void dsyevd(char *jobz, char *uplo,
    ptrdiff_t *n, double *a, ptrdiff_t *lda, double *w,
    double *work, ptrdiff_t *lwork, ptrdiff_t *iwork, ptrdiff_t *liwork,
    ptrdiff_t *info);
*/

void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    // validate input
    if (nrhs != 1 || nlhs > 2)
        mexErrMsgIdAndTxt("mex:error", "Wrong number of arguments.");
    if (!mxIsDouble(prhs[0]) || mxIsComplex(prhs[0]))
        mexErrMsgIdAndTxt("mex:error", "Input is not real double matrix.");
    mwSignedIndex m = mxGetM(prhs[0]), n = mxGetN(prhs[0]);
    if (m != n)
        mexErrMsgIdAndTxt("mex:error", "Input is not square symmetric matrix.");

    // allocate output
    plhs[0] = mxDuplicateArray(prhs[0]);
    plhs[1] = mxCreateDoubleMatrix(1, n, mxREAL);
    double *A = mxGetPr(plhs[0]);
    double *w = mxGetPr(plhs[1]);

    // query and allocate the optimal workspace size
    double workopt = 0;
    mwSignedIndex iworkopt = 0;
    mwSignedIndex lwork = -1, liwork = -1, info = 0;
    dsyevd("Vectors", "Upper", &n, A, &n, w,
        &workopt, &lwork, &iworkopt, &liwork, &info);
    lwork = (mwSignedIndex) workopt;
    liwork = (mwSignedIndex) iworkopt;
    double *work = (double*) mxMalloc(lwork * sizeof(double));
    mwSignedIndex *iwork = (mwSignedIndex*) mxMalloc(liwork * sizeof(mwSignedIndex));

    // compute eigenvectors/eigenvalues
    dsyevd("Vectors", "Upper", &n, A, &n, w,
        work, &lwork, iwork, &liwork, &info);
    mxFree(work);
    mxFree(iwork);

    // check successful computation
    if (info < 0)
        mexErrMsgIdAndTxt("mex:error", "Illegal values in arguments.");
    else if (info > 0)
        mexWarnMsgIdAndTxt("mex:warn", "Algorithm failed to converge.");
}

First we compile the MEX-file (I'm using VS2013 compiler on Windows):

>> mex -largeArrayDims eig_lapack.cpp libmwlapack.lib

Now I compare against the built-in eig function :

>> A = [1 2 3 4 5; 2 2 3 4 5; 3 3 3 4 5; 4 4 4 4 5; 5 5 5 5 5]
A =
     1     2     3     4     5
     2     2     3     4     5
     3     3     3     4     5
     4     4     4     4     5
     5     5     5     5     5

>> [V,D] = eig(A)
V =
    0.6420   -0.5234    0.3778   -0.1982    0.3631
    0.4404    0.1746   -0.6017    0.5176    0.3816
    0.1006    0.6398   -0.0213   -0.6356    0.4196
   -0.2708    0.2518    0.6143    0.5063    0.4791
   -0.5572   -0.4720   -0.3427   -0.1801    0.5629
D =
   -3.1851         0         0         0         0
         0   -0.7499         0         0         0
         0         0   -0.3857         0         0
         0         0         0   -0.2769         0
         0         0         0         0   19.5976

>> [VV,DD] = eig_lapack(A)
VV =
    0.6420   -0.5234    0.3778   -0.1982    0.3631
    0.4404    0.1746   -0.6017    0.5176    0.3816
    0.1006    0.6398   -0.0213   -0.6356    0.4196
   -0.2708    0.2518    0.6143    0.5063    0.4791
   -0.5572   -0.4720   -0.3427   -0.1801    0.5629
DD =
   -3.1851   -0.7499   -0.3857   -0.2769   19.5976

the results are the same (though not guaranteed, seeing that eigenvectors are defined up-to a scale ):

>> V-VV, D-diag(DD)
ans =
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
ans =
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0
     0     0     0     0     0

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM