How to copy dynamic matrix to device memory in CUDA?

Question

In my code I have dynamic matrix.

int ** file_data = (int **)malloc(TRANSACTIONS * sizeof(int *));
file_data[0] = (int *)malloc((a_size+1) * sizeof(int));
file_data[1] = (int *)malloc((a_size+1) * sizeof(int));
file_data[2] = (int *)malloc((a_size+1) * sizeof(int));
................................................................

I want to copy it to device global memory only once.

I have used:

__device__ int raw_data[][];
...................................
...................................
...................................
cudaMemcpyToSymbol(raw_data[i], file_data[i], (a_size+1)*sizeof(int));

But this do not works.

How can I do it?

Answer 1

You need to Flatten the data

If you're only working with rectangular matrices in the first place, I'd recommend always storing your Matrixes like this anyways, but either way, you'll need to get it into this form before trying to push this data to your device memory.

template<typename T>
class Matrix {
    std::vector<T> _data;
    size_t rows, columns;
public:
    Matrix(size_t rows, size_t columns) :rows(rows), columns(columns) {
        _data.resize(rows * columns);
    }

    T & operator()(size_t row, size_t column) & {
        return _data.at(row * columns + column); //Row-Major Ordering
    }

    T const& operator()(size_t row, size_t column) const& {
        return _data.at(row * columns + column);
    }

    T operator() size_t row, size_t column) const {
        return _data.at(row * columns + column);
    }

    T * data() & {
        return _data.data();
    }

    T const* data() const& {
        return _data.data();
    }

    std::pair<size_t, size_t> size() const {
        return {rows, columns};
    }

    size_t flat_size() const {
        return rows * columns;
    }

    size_t byte_size() const {
        return flat_size() * sizeof(T);
    }
};

int ** file_data = (int **)malloc(TRANSACTIONS * sizeof(int *));
file_data[0] = (int *)malloc((a_size+1) * sizeof(int));
file_data[1] = (int *)malloc((a_size+1) * sizeof(int));
file_data[2] = (int *)malloc((a_size+1) * sizeof(int));
//................................................................

Matrix<int> flat_data(TRANSACTIONS, a_size + 1);
for(size_t row = 0; row < TRANSACTIONS; row++) {
    for(size_t column = 0; column < a_size + 1; column++) {
        flat_data(row, column) = file_data[row][column];
    }
}
//ALTERNATIVE: use this instead of your manual mallocs in the first place!

cudaMemcpyToSymbol(flat_data.data(), /*buffer name*/, flat_data.byte_size());

This has the major advantage that you're not having to copy each row individually into their own buffers, you can put all of them together in memory, saving memory and reducing the number of API calls you need to make. And a class designed specifically to handle your functionality won't break when you inevitably make a mistake trying to manually handle all the pointer management in your original code.

How to copy dynamic matrix to device memory in CUDA?

Question

1 answers

solution1
2 ACCPTED 2019-05-29 18:20:30

You need to Flatten the data

How to copy dynamic matrix to device memory in CUDA?

Question

1 answers

solution1 2 ACCPTED 2019-05-29 18:20:30

You need to Flatten the data

solution1
2 ACCPTED 2019-05-29 18:20:30