Send and receive dynamic arrays in MPI

I have a big problem with send and (or) receiving dynamic arrays with MPI. Below is just a part of my code, but it should be enough to see what i am doing wrong. Please help me, i'm after all night of searching solution. I get sth like this: * glibc detected ./mv2.out: munmap_chunk(): invalid pointer: 0x0000000000da2a70 glibc detected ./mv2.out: malloc(): memory corruption (fast): 0x0000000000da2a50 *

When I replace dynamic arrays with static everything works perfect.

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>

#define MASTER 0
#define FROM_MASTER 1
#define FROM_WORKER 2

double **alloc_2d_array(int rows, int cols) {
    int i;
    double *data = (double *)malloc(rows*cols*sizeof(double));
    double **array= (double **)malloc(rows*sizeof(double*));
    for (i=0; i<rows; i++)
        array[i] = &(data[cols*i]);

    return array;

int main (int argc, char *argv[])
    int degree,
    i, j, k, rc;

    MPI_Status status;

    double **a, *b, *c;

    if (numtasks < 2 )
        printf("Aby rozpoczac obliczenia rownolegle potrzeba co najmniej 2 procesow.\n");
        MPI_Abort(MPI_COMM_WORLD, rc);
    numworkers = numtasks-1; 

    if (taskid == MASTER)
        printf("Podaj stopien macierzy: \n");
        scanf ("%d", &degree);
        printf("Obecnie dostepnych jest %d procesow do dyspozycji mastera.\n", numtasks);

        FILE *file;

        file = fopen("matrix.txt", "r");
        if(file == NULL)
            printf("Nie mozna otworzyc pliku!\n");

        a = alloc_2d_array(degree, degree);
        b = (double*) malloc(sizeof(double) * degree);
        c = (double*) malloc(sizeof(double) * degree);

        printf("Tworzenie macierzy z pliku\n");
        for(i = 0; i < degree; i++)
            for(j = 0; j < degree; j++)
                fscanf(file, "%lf", &a[i][j]);

        for(i = 0; i < degree; i++)
            for(j = 0; j < degree; j++)
                printf("%f", a[i][j]);

        printf("Tworzenie wektora z pliku\n");
        for(i = 0; i < degree; i++)
            fscanf(file, "%lf", &b[i]);

        for(i = 0; i < degree; i++)
            printf("%f\n", b[i]);


        averow = degree / numworkers;
        extra = degree % numworkers;
        offset = 0;
        mtype = FROM_MASTER;
        for (dest = 1; dest <= numworkers; dest++)
            rows = (dest <= extra) ? (averow + 1) : averow;
            printf("Wysylanie %d wierszy do procesu nr %d, z offset'em = %d\n", rows, dest, offset);
            MPI_Send(&degree, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
            MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
            MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
            MPI_Send(&a[offset][0], rows * degree, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
            MPI_Send(&b, degree, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
            offset = offset + rows;

        mtype = FROM_WORKER;
        for (i=1; i<=numworkers; i++)
            source = i;
            MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
            MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
            MPI_Recv(&c[offset], rows, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
            printf("Otrzymalem wyniki od procesu nr %d\n", source);

        printf("Wektor wynikowy:\n");
        for (i = 0; i < degree; i++)
            printf("\n%6.2f", c[i]);
        printf ("KONIEC\n");

    if (taskid > MASTER)
        b = (double*) malloc(sizeof(double) * degree);
        c = (double*) malloc(sizeof(double) * degree);

        mtype = FROM_MASTER;
        MPI_Recv(&degree, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
        MPI_Recv(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
        MPI_Recv(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
        MPI_Recv(&a, rows * degree, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status); //HERE IS THE PROBLEM
        MPI_Recv(&b, degree, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);

        for (i = 0; i < rows; i++)
            c[i] = 0.0;
            for (j = 0; j < degree; j++)
                c[i] += a[i][j] * b[j];
        mtype = FROM_WORKER;
        MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
        MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
        MPI_Send(&c, rows, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD);


The shortest version to reproduce error:

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>

#define MASTER 0
#define FROM_MASTER 1
#define FROM_WORKER 2

double **alloc_2d_array(int rows, int cols) {
    int i;
    double *data = (double *)malloc(rows*cols*sizeof(double));
    double **array= (double **)malloc(rows*sizeof(double*));
    for (i=0; i<rows; i++)
        array[i] = &(data[cols*i]);

    return array;

int main (int argc, char *argv[])
    int degree,
    i, j;

    MPI_Status status;

    double **a, *b, *c;


    if (taskid == MASTER)
        FILE *file;

        file = fopen("matrix.txt", "r");

        a = alloc_2d_array(degree, degree);
        b = (double*) malloc(sizeof(double) * degree);
        c = (double*) malloc(sizeof(double) * degree);

        for(i = 0; i < degree; i++)
            for(j = 0; j < degree; j++)
                fscanf(file, "%lf", &a[i][j]);      

        for(i = 0; i < degree; i++)
            fscanf(file, "%lf", &b[i]);


        offset = 0;
        mtype = FROM_MASTER;
        MPI_Send(&degree, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
        MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
        MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
        MPI_Send(&a[offset][0], rows * degree, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
        MPI_Send(&b, degree, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);

    if (taskid > MASTER)
        a = alloc_2d_array(degree, degree);
        b = (double*) malloc(sizeof(double) * degree);
        c = (double*) malloc(sizeof(double) * degree);

        mtype = FROM_MASTER;
        MPI_Recv(&degree, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
        MPI_Recv(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
        MPI_Recv(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
        // v HERE IS THE PROBLEM v
        MPI_Recv(&a, rows * degree, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);
        MPI_Recv(&b, degree, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);


EDIT: Version without sending from worker to master works correctly, but when I add responsing application suspends. In the link below you can find example with static arrays. When I replace them with dynamic ones, application doesn't work properly. https://computing.llnl.gov/tutorials/mpi/samples/C/mpi_mm.c

Having to be a bit psychic here as you haven't posted a complete program that can be run and tested (please do this in future, for instance here you haven't include all the #includes and #defines required, and also you don't supply an input file). However from what I can see your problems in the recv are two fold. Firstly you haven't allocated any memory for a on the "worker" processes. Secondly what you specified as the starting address for the recv of a is incorrect. Here is a cut down version of your code which does what I think you want (but again I'm guessing a bit). Also please note C is very much my second language so please check what I have done. Anyway here it is:

ian@ian-pc:~/test$ cat mpi.c

#include <stdio.h>
#include <stdlib.h>

#include "mpi.h"

#define FROM_MASTER 1
#define FROM_WORKER 2

#define MASTER 0

double **alloc_2d_array(int rows, int cols) {
  int i;
  double *data = (double *)malloc(rows*cols*sizeof(double));
  double **array= (double **)malloc(rows*sizeof(double*));
  for (i=0; i<rows; i++)
    array[i] = &(data[cols*i]);

  return array;

int main (int argc, char *argv[])

  int degree, numtasks, taskid, numworkers, dest, mtype, rows, offset, averow, extra, i, j;

  MPI_Status status;

  double **a;

  if (numtasks < 2 )
      printf("Aby rozpoczac obliczenia rownolegle potrzeba co najmniej 2 procesow.\n");
  numworkers = numtasks-1; 

  if (taskid == MASTER)
      printf("Podaj stopien macierzy: \n");
      scanf ("%d", &degree);
      printf("Obecnie dostepnych jest %d procesow do dyspozycji mastera.\n", numtasks);

      a = alloc_2d_array(degree, degree);

      printf("Tworzenie macierzy z pliku\n");
      for(i = 0; i < degree; i++)
    for(j = 0; j < degree; j++)
      a[ i ][ j ] = i + 10 * j;

      printf( "Initial\n" );
      for(i = 0; i < degree; i++) {
    for(j = 0; j < degree; j++) 
      printf("%f ", a[i][j]);

      averow = degree / numworkers;
      extra = degree % numworkers;
      offset = 0;
      mtype = FROM_MASTER;
      for (dest = 1; dest <= numworkers; dest++)
      rows = (dest <= extra) ? (averow + 1) : averow;
      MPI_Send(&degree, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
      MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
      MPI_Send(&(a[offset][0]), rows * degree, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
      offset = offset + rows;


  /* Attempt to order output, not gauranteed to work */
  MPI_Barrier( MPI_COMM_WORLD );

  if (taskid > MASTER)

      mtype = FROM_MASTER;
      MPI_Recv(&degree, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
      MPI_Recv(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
      a = alloc_2d_array( rows, degree);
      /* MPI_Recv(&a, rows * degree, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);  */
      MPI_Recv(&(a[0][0]), rows * degree, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status); 

      printf( "Final from %d\n", taskid );
      for(i = 0; i < rows; i++) {
    printf( "%d ", taskid );
    for(j = 0; j < degree; j++)
      printf("%f ", a[i][j]);


  return EXIT_SUCCESS;

ian@ian-pc:~/test$ mpicc -std=c89 -Wall -Wextra -pedantic -O mpi.c
In file included from mpi.c:5:0:
/usr/lib/openmpi/include/mpi.h:220:9: warning: ISO C90 does not support ‘long long’ [-Wlong-long]
mpi.c: In function ‘main’:
mpi.c:45:13: warning: ignoring return value of ‘scanf’, declared with attribute warn_unused_result [-Wunused-result]
ian@ian-pc:~/test$ mpirun -np 3 ./a.out
Podaj stopien macierzy: 
Obecnie dostepnych jest 3 procesow do dyspozycji mastera.
Tworzenie macierzy z pliku
0.000000 10.000000 
1.000000 11.000000 
Final from 1
1 0.000000 10.000000 
Final from 2
2 1.000000 11.000000 
ian@ian-pc:~/test$ mpirun -np 3 ./a.out
Podaj stopien macierzy: 
Final from 1
1 0.000000 10.000000 20.000000 30.000000 
1 1.000000 11.000000 21.000000 31.000000 
Final from 2
2 2.000000 12.000000 22.000000 32.000000 
2 3.000000 13.000000 23.000000 33.000000 
Obecnie dostepnych jest 3 procesow do dyspozycji mastera.
Tworzenie macierzy z pliku
0.000000 10.000000 20.000000 30.000000 
1.000000 11.000000 21.000000 31.000000 
2.000000 12.000000 22.000000 32.000000 
3.000000 13.000000 23.000000 33.000000 
ian@ian-pc:~/test$ mpirun -np 3 ./a.out
Podaj stopien macierzy: 
Final from 2
2 3.000000 13.000000 23.000000 33.000000 43.000000 
2 4.000000 14.000000 24.000000 34.000000 44.000000 
Obecnie dostepnych jest 3 procesow do dyspozycji mastera.
Tworzenie macierzy z pliku
0.000000 10.000000 20.000000 30.000000 40.000000 
1.000000 11.000000 21.000000 31.000000 41.000000 
2.000000 12.000000 22.000000 32.000000 42.000000 
3.000000 13.000000 23.000000 33.000000 43.000000 
4.000000 14.000000 24.000000 34.000000 44.000000 
Final from 1
1 0.000000 10.000000 20.000000 30.000000 40.000000 
1 1.000000 11.000000 21.000000 31.000000 41.000000 
1 2.000000 12.000000 22.000000 32.000000 42.000000 

However also learn about MPI_Bcast, it would be useful here ...

The way you allocate your array dynamically seems strange. Usually you do it like that:

double **alloc_2d_array(int rows, int cols) {
    int i;
    double **array= (double **)malloc(rows*sizeof(double*));
    for (i=0; i<rows; i++)
        array[i] = (double *)malloc(rows*cols*sizeof(double));
    return array;

And then deallocation would also happen in two steps:

void free_2d_array(double*** arrayPtr){
  double **array = *arrayPtr;
  for (i=0; i<rows; i++) free(array[i]);

UPDATE: Assuming that it's more efficient to do in the way it was originally suggested, the allocation should be like this:

double **alloc_2d_array(int rows, int cols) {
    int i;
    double *data = (double *)malloc(rows*cols*sizeof(double));
    double **array= (double **)malloc(rows*sizeof(double*));
    for (i=0; i<rows; i++)
        array[i] = data + cols*i;

    return array;

In this case, the deallocation is also easy: calling free for both arrays.

