简体   繁体   English

Strassen矩阵乘法的内存管理

[英]Memory management for Strassen's matrix multiplication

As a part of an assignment, I am trying to find out the crossover point for Strassen's matrix multiplication and naive multiplication algorithms. 作为任务的一部分,我试图找出Strassen矩阵乘法和朴素乘法算法的交叉点。 But for the same, I am unable to proceed when matrix becomes 256x256. 但同样,当矩阵变为256x256时,我无法继续。 Can someone please suggest me the appropriate memory management technique to be able to handle larger inputs. 有人可以建议我适当的内存管理技术,以便能够处理更大的输入。

The code is in C as follows: 代码在C中如下:

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);

int idx = 0;

int main()
{
    int N;
    int count = 0;
    int i, j;
    clock_t start, end;
    double elapsed;

    int total = 15;
    double tnaive[total];
    double tstrassen[total];
    printf("-------------------------------------------------------------------------\n\n");
    for (count = 0; count < total; count++)
    {
        N = pow(2, count);
        printf("Matrix size = %2d\t",N);
        double X[N][N], Y[N][N], Z[N][N], W[N][N];
        srand(time(NULL));
        for (i = 0; i < N; i++)
        {
            for (j = 0; j < N; j++)
            {
                X[i][j] = rand()/(RAND_MAX + 1.);
                Y[i][j] = rand()/(RAND_MAX + 1.);
            }
        }
        start = clock();
        matMul((double *)X, (double *)Y, (double *)W, N);
        end = clock();
        elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
        tnaive[count] = elapsed;
        printf("naive = %5.4f\t\t",tnaive[count]);

        start = clock();
        strassenMul((double *)X, (double *)Y, (double *)Z, N);
        end = clock();
        elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
        tstrassen[count] = elapsed;
        printf("strassen = %5.4f\n",tstrassen[count]);
    }
    printf("-------------------------------------------------------------------\n\n\n");

    while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;

    printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);

    printf("Please enter the size of array as a power of 2\n");
    scanf("%d",&N);
    double A[N][N], B[N][N], C[N][N];
    srand(time(NULL));
    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
        {
            A[i][j] = rand()/(RAND_MAX + 1.);
            B[i][j] = rand()/(RAND_MAX + 1.);
        }
    }

printf("------------------- Input Matrices A and B ---------------------------\n\n");
    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",A[i][j]);
        printf("\n");
    }
    printf("\n");

    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",B[i][j]);
        printf("\n");
    }
    printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");

    strassenMul((double *)A, (double *)B, (double *)C, N);

    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",C[i][j]);
        printf("\n");
    }
    return(0);
}

void strassenMul(double *X, double *Y, double *Z, int m)
{
    if (m <= idx)
    {
        matMul((double *)X, (double *)Y, (double *)Z, m);
        return;
    }
    if (m == 1)
    {
        *Z = *X * *Y;
        return;
    }
    int row = 0, col = 0;
    int n = m/2;
    int i = 0, j = 0;
    double x11[n][n], x12[n][n], x21[n][n], x22[n][n];
    double y11[n][n], y12[n][n], y21[n][n], y22[n][n];
    double P1[n][n], P2[n][n], P3[n][n], P4[n][n], P5[n][n], P6[n][n], P7[n][n];
    double C11[n][n], C12[n][n], C21[n][n], C22[n][n];
    double S1[n][n], S2[n][n], S3[n][n], S4[n][n], S5[n][n], S6[n][n], S7[n][n];
    double S8[n][n], S9[n][n], S10[n][n], S11[n][n], S12[n][n], S13[n][n], S14[n][n];

    for (row = 0, i = 0; row < n; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
        {
            x11[i][j] = *((X+row*m)+col);
            y11[i][j] = *((Y+row*m)+col);
        }
        for (col = n, j = 0; col < m; col++, j++)
        {
            x12[i][j] = *((X+row*m)+col);
            y12[i][j] = *((Y+row*m)+col);
        }
    }

    for (row = n, i = 0; row < m; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
        {
            x21[i][j] = *((X+row*m)+col);
            y21[i][j] = *((Y+row*m)+col);
        }
        for (col = n, j = 0; col < m; col++, j++)
        {
            x22[i][j] = *((X+row*m)+col);
            y22[i][j] = *((Y+row*m)+col);
        }
    }

    // Calculating P1
    matAdd((double *)x11, (double *)x22, (double *)S1, n);
    matAdd((double *)y11, (double *)y22, (double *)S2, n);
    strassenMul((double *)S1, (double *)S2, (double *)P1, n);

    // Calculating P2
    matAdd((double *)x21, (double *)x22, (double *)S3, n);
    strassenMul((double *)S3, (double *)y11, (double *)P2, n);

    // Calculating P3
    matSub((double *)y12, (double *)y22, (double *)S4, n);
    strassenMul((double *)x11, (double *)S4, (double *)P3, n);

    // Calculating P4
    matSub((double *)y21, (double *)y11, (double *)S5, n);
    strassenMul((double *)x22, (double *)S5, (double *)P4, n);

    // Calculating P5
    matAdd((double *)x11, (double *)x12, (double *)S6, n);
    strassenMul((double *)S6, (double *)y22, (double *)P5, n);

    // Calculating P6
    matSub((double *)x21, (double *)x11, (double *)S7, n);
    matAdd((double *)y11, (double *)y12, (double *)S8, n);
    strassenMul((double *)S7, (double *)S8, (double *)P6, n);

    // Calculating P7
    matSub((double *)x12, (double *)x22, (double *)S9, n);
    matAdd((double *)y21, (double *)y22, (double *)S10, n);
    strassenMul((double *)S9, (double *)S10, (double *)P7, n);

    // Calculating C11
    matAdd((double *)P1, (double *)P4, (double *)S11, n);
    matSub((double *)S11, (double *)P5, (double *)S12, n);
    matAdd((double *)S12, (double *)P7, (double *)C11, n);

    // Calculating C12
    matAdd((double *)P3, (double *)P5, (double *)C12, n);

    // Calculating C21
    matAdd((double *)P2, (double *)P4, (double *)C21, n);

    // Calculating C22
    matAdd((double *)P1, (double *)P3, (double *)S13, n);
    matSub((double *)S13, (double *)P2, (double *)S14, n);
    matAdd((double *)S14, (double *)P6, (double *)C22, n);

    for (row = 0, i = 0; row < n; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
            *((Z+row*m)+col) = C11[i][j];
        for (col = n, j = 0; col < m; col++, j++)
            *((Z+row*m)+col) = C12[i][j];
    }
    for (row = n, i = 0; row < m; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
            *((Z+row*m)+col) = C21[i][j];
        for (col = n, j = 0; col < m; col++, j++)
            *((Z+row*m)+col) = C22[i][j];
    }
}

void matMul(double *A, double *B, double *C, int n)
{
    int i = 0, j = 0, k = 0, row = 0, col = 0;
    double sum;
    for (i = 0; i < n; i++)
    {
        for (j = 0; j < n; j++)
        {
            sum = 0.0;
            for (k = 0; k < n; k++)
            {
                sum += *((A+i*n)+k) * *((B+k*n)+j);
            }
            *((C+i*n)+j) = sum;
        }
    }
}

void matAdd(double *A, double *B, double *C, int m)
{
    int row = 0, col = 0;
    for (row = 0; row < m; row++)
        for (col = 0; col < m; col++)
            *((C+row*m)+col) = *((A+row*m)+col) + *((B+row*m)+col);
}

void matSub(double *A, double *B, double *C, int m)
{
    int row = 0, col = 0;
    for (row = 0; row < m; row++)
        for (col = 0; col < m; col++)
            *((C+row*m)+col) = *((A+row*m)+col) - *((B+row*m)+col);
}

Added later If I try using malloc statements for memory assignment, the code is as follows. 稍后添加如果我尝试使用malloc语句进行内存分配,则代码如下。 But the problem is that it stops after the naive matrix multiplication method and does not even proceed to the Strassen's method for N=1. 但问题是它在朴素矩阵乘法后停止,甚至没有进入Strassen方法N = 1。 It shows a prompt to close the program. 它显示关闭程序的提示。

for (count = 0; count < total; count++)
{
    N = pow(2, count);
    printf("Matrix size = %2d\t",N);
    //double X[N][N], Y[N][N], Z[N][N], W[N][N];
    double **X, **Y, **Z, **W;
    X = malloc(N * sizeof(double*));
    if (X == NULL){
        perror("Failed malloc() in X");
        return 1;
    }
    Y = malloc(N * sizeof(double*));
            if (Y == NULL){
                perror("Failed malloc() in Y");
                return 1;
    }
    Z = malloc(N * sizeof(double*));
            if (Z == NULL){
                perror("Failed malloc() in Z");
                return 1;
    }
    W = malloc(N * sizeof(double*));
            if (W == NULL){
                perror("Failed malloc() in W");
                return 1;
    }
    for (j = 0; j < N; j++)
    {
        X[j] = malloc(N * sizeof(double*));
        if (X[j] == NULL){
            perror("Failed malloc() in X[j]");
            return 1;
        }
        Y[j] = malloc(N * sizeof(double*));
                    if (Y[j] == NULL){
                        perror("Failed malloc() in Y[j]");
                        return 1;
        }
        Z[j] = malloc(N * sizeof(double*));
                    if (Z[j] == NULL){
                        perror("Failed malloc() in Z[j]");
                        return 1;
        }
        W[j] = malloc(N * sizeof(double*));
                    if (W[j] == NULL){
                        perror("Failed malloc() in W[j]");
                        return 1;
        }
    }
    srand(time(NULL));
    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
        {
            X[i][j] = rand()/(RAND_MAX + 1.);
            Y[i][j] = rand()/(RAND_MAX + 1.);
        }
    }
    start = clock();
    matMul((double *)X, (double *)Y, (double *)W, N);
    end = clock();
    elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
    tnaive[count] = elapsed;
    printf("naive = %5.4f\t\t",tnaive[count]);

    start = clock();
    strassenMul((double *)X, (double *)Y, (double *)Z, N);
    end = clock();
    elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
    tstrassen[count] = elapsed;
    for (j = 0; j < N; j++)
    {
        free(X[j]);
        free(Y[j]);
        free(Z[j]);
        free(W[j]);
    }

    free(X); free(Y); free(Z); free(W);

    printf("strassen = %5.4f\n",tstrassen[count]);
}

I have re-written the answer. 我重写了答案。 My previous answer which allocated memory row by row won't work, because OP has cast the 2-D arrays to 1-D arrays when passed to the functions. 我以前逐行分配内存的答案将无效,因为OP在传递给函数时已将2-D数组转换为1-D数组。 Here is my re-write of the code with some simplifications, such as keeping all the matrix arrays 1-dimensional. 这是我对代码的重写,并进行了一些简化,例如将所有矩阵数组保持为1维。

I am unsure exactly what Strassen's method does, although the recursion halves the matrix dimensions. 我不确定Strassen的方法到底是什么,尽管递归将矩阵维度减半。 So I do wonder if the intention was to use row*2 and col*2 when accessing the arrays passed. 所以我想知道在访问传递的数组时是否打算使用row*2col*2

I hope the techniques are useful to you - even that it works! 我希望这些技术对你有用 - 即使它有效! All the matrix arrays are now on the heap. 所有矩阵数组现在都在堆上。

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

#define total   4       //15

void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);

enum array { x11, x12, x21, x22, y11, y12, y21, y22,
    P1, P2, P3, P4, P5, P6, P7, C11, C12, C21, C22,
    S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, arrs };

int idx = 0;

int main()
{
    int N;
    int count = 0;
    int i, j;
    clock_t start, end;
    double elapsed;

    double tnaive[total];
    double tstrassen[total];
    double *X, *Y, *Z, *W, *A, *B, *C;

    printf("-------------------------------------------------------------------------\n\n");
    for (count = 0; count < total; count++)
    {
        N = (int)pow(2, count);
        printf("Matrix size = %2d\t",N);
        X = malloc(N*N*sizeof(double));
        Y = malloc(N*N*sizeof(double));
        Z = malloc(N*N*sizeof(double));
        W = malloc(N*N*sizeof(double));
        if (X==NULL || Y==NULL || Z==NULL || W==NULL) {
            printf("Out of memory (1)\n");
            return 1;
        }
        srand((unsigned)time(NULL));
        for (i=0; i<N*N; i++)
        {
            X[i] = rand()/(RAND_MAX + 1.);
            Y[i] = rand()/(RAND_MAX + 1.);
        }
        start = clock();
        matMul(X, Y, W, N);
        end = clock();
        elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
        tnaive[count] = elapsed;
        printf("naive = %5.4f\t\t",tnaive[count]);

        start = clock();
        strassenMul(X, Y, Z, N);
        free(W); 
        free(Z); 
        free(Y); 
        free(X); 
        end = clock();
        elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
        tstrassen[count] = elapsed;
        printf("strassen = %5.4f\n",tstrassen[count]);
    }
    printf("-------------------------------------------------------------------\n\n\n");

    while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;

    printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);

    printf("Please enter the size of array as a power of 2\n");
    scanf("%d",&N);
    A = malloc(N*N*sizeof(double));
    B = malloc(N*N*sizeof(double));
    C = malloc(N*N*sizeof(double));
    if (A==NULL || B==NULL || C==NULL) {
        printf("Out of memory (2)\n");
        return 1;
    }
    srand((unsigned)time(NULL));
    for (i=0; i<N*N; i++)
    {
        A[i] = rand()/(RAND_MAX + 1.);
        B[i] = rand()/(RAND_MAX + 1.);
    }

    printf("------------------- Input Matrices A and B ---------------------------\n\n");
    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",A[i*N+j]);
        printf("\n");
    }
    printf("\n");

    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",B[i*N+j]);
        printf("\n");
    }
    printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");

    strassenMul(A, B, C, N);

    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",C[i*N+j]);
        printf("\n");
    }
    free(C); 
    free(B); 
    free(A); 
    return(0);
}

void strassenMul(double *X, double *Y, double *Z, int m)
{
    int row = 0, col = 0;
    int n = m/2;
    int i = 0, j = 0;
    double *arr[arrs];                      // each matrix mem ptr

    if (m <= idx)
    {
        matMul(X, Y, Z, m);
        return;
    }
    if (m == 1)
    {
        *Z = *X * *Y;
        return;
    }

    for (i=0; i<arrs; i++) {                // memory for arrays
        arr[i] = malloc(n*n*sizeof(double));
        if (arr[i] == NULL) {
            printf("Out of memory (1)\n");
            exit (1);                       // brutal
        }
    }

    for (row = 0, i = 0; row < n; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
        {
            arr[x11][i*n+j] = X[row*m+col];
            arr[y11][i*n+j] = Y[row*m+col];
        }
        for (col = n, j = 0; col < m; col++, j++)
        {
            arr[x12][i*n+j] = X[row*m+col];
            arr[y12][i*n+j] = Y[row*m+col];
        }
    }

    for (row = n, i = 0; row < m; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
        {
            arr[x21][i*n+j] = X[row*m+col];
            arr[y21][i*n+j] = Y[row*m+col];
        }
        for (col = n, j = 0; col < m; col++, j++)
        {
            arr[x22][i*n+j] = X[row*m+col];
            arr[y22][i*n+j] = Y[row*m+col];
        }
    }

    // Calculating P1
    matAdd(arr[x11], arr[x22], arr[S1], n);
    matAdd(arr[y11], arr[y22], arr[S2], n);
    strassenMul(arr[S1], arr[S2], arr[P1], n);

    // Calculating P2
    matAdd(arr[x21], arr[x22], arr[S3], n);
    strassenMul(arr[S3], arr[y11], arr[P2], n);

    // Calculating P3
    matSub(arr[y12], arr[y22], arr[S4], n);
    strassenMul(arr[x11], arr[S4], arr[P3], n);

    // Calculating P4
    matSub(arr[y21], arr[y11], arr[S5], n);
    strassenMul(arr[x22], arr[S5], arr[P4], n);

    // Calculating P5
    matAdd(arr[x11], arr[x12], arr[S6], n);
    strassenMul(arr[S6], arr[y22], arr[P5], n);

    // Calculating P6
    matSub(arr[x21], arr[x11], arr[S7], n);
    matAdd(arr[y11], arr[y12], arr[S8], n);
    strassenMul(arr[S7], arr[S8], arr[P6], n);

    // Calculating P7
    matSub(arr[x12], arr[x22], arr[S9], n);
    matAdd(arr[y21], arr[y22], arr[S10], n);
    strassenMul(arr[S9], arr[S10], arr[P7], n);

    // Calculating C11
    matAdd(arr[P1], arr[P4], arr[S11], n);
    matSub(arr[S11], arr[P5], arr[S12], n);
    matAdd(arr[S12], arr[P7], arr[C11], n);

    // Calculating C12
    matAdd(arr[P3], arr[P5], arr[C12], n);

    // Calculating C21
    matAdd(arr[P2], arr[P4], arr[C21], n);

    // Calculating C22
    matAdd(arr[P1], arr[P3], arr[S13], n);
    matSub(arr[S13], arr[P2], arr[S14], n);
    matAdd(arr[S14], arr[P6], arr[C22], n);

    for (row = 0, i = 0; row < n; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
            Z[row*m+col] = arr[C11][i*n+j];
        for (col = n, j = 0; col < m; col++, j++)
            Z[row*m+col] = arr[C12][i*n+j];
    }
    for (row = n, i = 0; row < m; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
            Z[row*m+col] = arr[C21][i*n+j];
        for (col = n, j = 0; col < m; col++, j++)
            Z[row*m+col] = arr[C22][i*n+j];
    }

    for (i=0; i<arrs; i++)
        free (arr[i]);
}

void matMul(double *A, double *B, double *C, int n)
{
    int i = 0, j = 0, k = 0, row = 0, col = 0;
    double sum;
    for (i = 0; i < n; i++)
    {
        for (j = 0; j < n; j++)
        {
            sum = 0.0;
            for (k = 0; k < n; k++)
            {
                sum += A[i*n+k] * B[k*n+j];
            }
            C[i*n+j] = sum;
        }
    }
}

void matAdd(double *A, double *B, double *C, int m)
{
    int row = 0, col = 0;
    for (row = 0; row < m; row++)
        for (col = 0; col < m; col++)
            C[row*m+col] = A[row*m+col] + B[row*m+col];
}

void matSub(double *A, double *B, double *C, int m)
{
    int row = 0, col = 0;
    for (row = 0; row < m; row++)
        for (col = 0; col < m; col++)
            C[row*m+col] = A[row*m+col] - B[row*m+col];
}

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM