簡體   English   中英

Cuda 如何將 char** 從內核復制到主機

[英]Cuda how to copy char** from kernel to host

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <fstream>
#include <algorithm>
#include <time.h>

using namespace std;

__global__ void kern_2D(char **desc, char** merge_char) {

    int idx = threadIdx.x + blockDim.x*blockIdx.x;
    int idy = threadIdx.y + blockDim.y*blockIdx.y;

    if (idx < 10000)
    {
        char* s1 = desc[idx];
        merge_char[idx] = s1;
        //printf("From key = %s\n", merge_char[idx]);
    }

}


int main() {
    cudaError_t err = cudaSuccess;
    size_t max_line_len = 255;
    char line[255];
    size_t line_len;
    size_t max_lines_desc = 10000;
    //---------------------------------------------------------------------------------//

    char **d_desc;
    cudaMalloc(&d_desc, max_lines_desc * sizeof(char *));

    char **m_desc = NULL;
    m_desc = (char**)malloc(max_lines_desc * sizeof(char**));
    char **d_temp_desc = NULL;
    d_temp_desc = (char **)malloc(max_lines_desc * sizeof(char **));

    FILE *f_desc = fopen("desc.txt", "r");
    if (!f_desc)
    {
        fprintf(stderr, "Error opening file!\n");
    }
    int idesc = 0;

    do
    {
        if (!fgets(line, max_line_len, f_desc))
        {
            if (ferror(f_desc) && !feof(f_desc))
            {
                fprintf(stderr, "Error reading from file!\n");
                fclose(f_desc);
            }
            break;
        }

        line_len = strlen(line);
        if ((line_len > 0) && (line[line_len - 1] == '\n'))
        {
            line[line_len - 1] = '\0';
            --line_len;
        }
        m_desc[idesc] = line;
        cudaMalloc(&(d_temp_desc[idesc]), sizeof(line) * sizeof(char));
        cudaMemcpy(d_temp_desc[idesc], m_desc[idesc], sizeof(line) * sizeof(char), cudaMemcpyHostToDevice);
        cudaMemcpy(d_desc + idesc, &(d_temp_desc[idesc]), sizeof(char *), cudaMemcpyHostToDevice);

        ++idesc;
    } while (idesc < max_lines_desc);
    fclose(f_desc);

    //---------------------------------------------------------------------------------//


    char **merge_char;
    cudaMallocManaged(&merge_char, max_lines_desc * sizeof(char *));


    kern_2D << < 1, 1000 >> > (d_desc , merge_char);

    err = cudaDeviceSynchronize();
    if (err != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned error code %s after launching addKernel!\n", cudaGetErrorString(err));
    }


    //---------------------------------------------------------------------------------//

    char** h_dev;

    cudaMalloc((void**)(&h_dev), max_lines_desc * sizeof(char*));
    err = cudaMemcpy(h_dev, merge_char, max_lines_desc * sizeof(char*), cudaMemcpyDeviceToHost);
    if (err == cudaSuccess) printf("2: Okay \n");


    for (int i = 0; i < max_lines_desc; i++)
    {
        printf("%s\n", h_dev[i]);
    }


    return 0;


}
//nvcc - arch = sm_30 - o kernel kernel.cu
// cuda - memcheck . / kernel

我很抱歉我的錯誤。 我已經更新了我的代碼。 它完成了。

對於 desc.txt,該文件有 10000 行,如下所示。 從設備復制到主機后,我檢查了狀態,但我錯了。 我無法打印 char** h_dev。

摩托車 ckd 新款 apsonic ckd 2017 ckd 2018 摩托車 apsonic 新款摩托車 apsonic 125 摩托車 apsonic ap125 新款摩托車 apsonic ap125

我不得不說我真的不明白你在這里的意圖,因為你的內核所做的唯一一件事就是交換指針。 如果這就是您打算做的所有事情,那么您肯定會因為到處使用雙指針而給自己帶來麻煩。 僅管理索引會簡單得多。

但是為了解決您的問題,據我所知,您的“復制回主機”確實不正確。 您正在有效地將數據從主機到設備進行深度復制,因此您還需要在另一個方向進行深度復制(2 階段復制)。

為了實現這一點,我們不在要托管的副本上使用cudaMalloc cudaMalloc分配設備內存。 如果要將某些內容復制到主機,則復制目標是主機內存。 因此,我們將需要一組cudaMemcpy操作將數據深度復制回主機,使用主機緩沖區作為目標。

下面的代碼代表了我可以對你所展示的內容進行的最簡單的修改,它似乎適用於我的簡單測試用例:

$ cat desc.txt
1motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
2motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
3motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
4motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
5motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
6motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap1
$ cat t301.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <fstream>
#include <algorithm>
#include <time.h>

using namespace std;

__global__ void kern_2D(char **desc, char** merge_char) {

    int idx = threadIdx.x + blockDim.x*blockIdx.x;
    int idy = threadIdx.y + blockDim.y*blockIdx.y;

    if (idx < 10000)
    {
        char* s1 = desc[idx];
        merge_char[idx] = s1;
        //printf("From key = %s\n", merge_char[idx]);
    }

}


int main() {
    cudaError_t err = cudaSuccess;
    size_t max_line_len = 255;
    char line[255];
    size_t line_len;
    size_t max_lines_desc = 10000;
    //---------------------------------------------------------------------------------//

    char **d_desc;
    cudaMalloc(&d_desc, max_lines_desc * sizeof(char *));

    char **m_desc = NULL;
    m_desc = (char**)malloc(max_lines_desc * sizeof(char**));
    char **d_temp_desc = NULL;
    d_temp_desc = (char **)malloc(max_lines_desc * sizeof(char **));

    FILE *f_desc = fopen("desc.txt", "r");
    if (!f_desc)
    {
        fprintf(stderr, "Error opening file!\n");
    }
    int idesc = 0;

    do
    {
        if (!fgets(line, max_line_len, f_desc))
        {
            if (ferror(f_desc) && !feof(f_desc))
            {
                fprintf(stderr, "Error reading from file!\n");
                fclose(f_desc);
            }
            break;
        }

        line_len = strlen(line);
        if ((line_len > 0) && (line[line_len - 1] == '\n'))
        {
            line[line_len - 1] = '\0';
            --line_len;
        }
        m_desc[idesc] = line;
        cudaMalloc(&(d_temp_desc[idesc]), sizeof(line) * sizeof(char));
        cudaMemcpy(d_temp_desc[idesc], m_desc[idesc], sizeof(line) * sizeof(char), cudaMemcpyHostToDevice);
        cudaMemcpy(d_desc + idesc, &(d_temp_desc[idesc]), sizeof(char *), cudaMemcpyHostToDevice);

        ++idesc;
    } while (idesc < max_lines_desc);
    fclose(f_desc);

    //---------------------------------------------------------------------------------//


    char **merge_char;
    cudaMallocManaged(&merge_char, max_lines_desc * sizeof(char *));


    kern_2D << < 1, 1000 >> > (d_desc , merge_char);

    err = cudaDeviceSynchronize();
    if (err != cudaSuccess) {
        fprintf(stderr, "cudaDeviceSynchronize returned error code %s after launching addKernel!\n", cudaGetErrorString(err));
    }


    //---------------------------------------------------------------------------------//

    char** h_dev;

    h_dev = (char **)malloc(max_lines_desc * sizeof(char*));
    err = cudaMemcpy(h_dev, merge_char, max_lines_desc * sizeof(char*), cudaMemcpyDeviceToHost);
    if (err == cudaSuccess) printf("2: Okay \n");


    for (int i = 0; i < 6; i++)
    {
        cudaMemcpy(line, h_dev[i], sizeof(line), cudaMemcpyDeviceToHost);
        printf("%s\n", line);
    }


    return 0;


}
$ nvcc -o t301 t301.cu
t301.cu(15): warning: variable "idy" was declared but never referenced

$ cuda-memcheck ./t301
========= CUDA-MEMCHECK
2: Okay
1motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
2motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
3motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
4motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
5motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
6motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap1
========= ERROR SUMMARY: 0 errors
$

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM