簡體   English   中英

CuFFT雙至復數

[英]CuFFT Double to Complex

我想使用CuFFT Lib將FFT從double轉換為std :: complex。 我的代碼看起來像

#include <complex>
#include <iostream>
#include <cufft.h>
#include <cuda_runtime_api.h>

typedef std::complex<double> Complex;
using namespace std;

int main(){
  int n = 100;
  double* in;
  Complex* out;
  in = (double*) malloc(sizeof(double) * n);
  out = (Complex*) malloc(sizeof(Complex) * n/2+1);
  for(int i=0; i<n; i++){
     in[i] = 1;
  }

  cufftHandle plan;
  plan = cufftPlan1d(&plan, n, CUFFT_D2Z, 1);
  unsigned int mem_size = sizeof(double)*n;
  cufftDoubleReal *d_in;
  cufftDoubleComplex *d_out;
  cudaMalloc((void **)&d_in, mem_size);
  cudaMalloc((void **)&d_out, mem_size);
  cudaMemcpy(d_in, in, mem_size, cudaMemcpyHostToDevice);
  cudaMemcpy(d_out, out, mem_size, cudaMemcpyHostToDevice);
  int succes = cufftExecD2Z(plan,(cufftDoubleReal *) d_in,(cufftDoubleComplex *) d_out);
  cout << succes << endl;
  cudaMemcpy(out, d_out, mem_size, cudaMemcpyDeviceToHost);

  for(int i=0; i<n/2; i++){
     cout << "out: " << i << " "  << out[i].real() << " " <<  out[i].imag() << endl;
  }
  return 0;
}

但是在我看來這一定是錯誤的,因為我認為轉換后的值應該是1 0 0 0 0 ....或不進行歸一化100 0 0 0 0 ....但我只會得到0 0 0 0 0。 ..

此外,如果cufftExecD2Z可以在適當的地方工作,我會更希望這樣做,這應該是可能的,但是我還沒有弄清楚如何正確地做到這一點。 有人可以幫忙嗎?

您的代碼有各種錯誤。 您可能應該查看cufft文檔以及示例代碼。

  1. 您應該對所有API返回值進行正確的cuda錯誤檢查和正確的cufft錯誤檢查。
  2. cufftPlan1d函數的返回值不計入計划:

     plan = cufftPlan1d(&plan, n, CUFFT_D2Z, 1); 

    函數本身會設置計划(這就是為什么將&plan傳遞給函數的原因),然后,當您將返回值分配給計划時,它將破壞函數設置的計划。

  3. 您正確地確定輸出的大小可以為((N/2)+1) ,但是在主機端您都沒有為它正確分配空間:

     out = (Complex*) malloc(sizeof(Complex) * n/2+1); 

    或在設備端:

     unsigned int mem_size = sizeof(double)*n; ... cudaMalloc((void **)&d_out, mem_size); 

下面的代碼已修復了上述一些問題,足以得到您想要的結果(100、0、0,...)

#include <complex>
#include <iostream>
#include <cufft.h>
#include <cuda_runtime_api.h>

#define cudaCheckErrors(msg) \
    do { \
        cudaError_t __err = cudaGetLastError(); \
        if (__err != cudaSuccess) { \
            fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                msg, cudaGetErrorString(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING\n"); \
            exit(1); \
        } \
    } while (0)


typedef std::complex<double> Complex;
using namespace std;

int main(){
  int n = 100;
  double* in;
  Complex* out;
#ifdef IN_PLACE
  in = (double*) malloc(sizeof(Complex) * (n/2+1));
  out = (Complex*)in;
#else
  in = (double*) malloc(sizeof(double) * n);
  out = (Complex*) malloc(sizeof(Complex) * (n/2+1));
#endif
  for(int i=0; i<n; i++){
     in[i] = 1;
  }

  cufftHandle plan;
  cufftResult res = cufftPlan1d(&plan, n, CUFFT_D2Z, 1);
  if (res != CUFFT_SUCCESS)  {cout << "cufft plan error: " << res << endl; return 1;}
  cufftDoubleReal *d_in;
  cufftDoubleComplex *d_out;
  unsigned int out_mem_size = (n/2 + 1)*sizeof(cufftDoubleComplex);
#ifdef IN_PLACE
  unsigned int in_mem_size = out_mem_size;
  cudaMalloc((void **)&d_in, in_mem_size);
  d_out = (cufftDoubleComplex *)d_in;
#else
  unsigned int in_mem_size = sizeof(cufftDoubleReal)*n;
  cudaMalloc((void **)&d_in, in_mem_size);
  cudaMalloc((void **)&d_out, out_mem_size);
#endif
  cudaCheckErrors("cuda malloc fail");
  cudaMemcpy(d_in, in, in_mem_size, cudaMemcpyHostToDevice);
  cudaCheckErrors("cuda memcpy H2D fail");
  res = cufftExecD2Z(plan,d_in, d_out);
  if (res != CUFFT_SUCCESS)  {cout << "cufft exec error: " << res << endl; return 1;}
  cudaMemcpy(out, d_out, out_mem_size, cudaMemcpyDeviceToHost);
  cudaCheckErrors("cuda memcpy D2H fail");

  for(int i=0; i<n/2; i++){
     cout << "out: " << i << " "  << out[i].real() << " " <<  out[i].imag() << endl;
  }
  return 0;
}

查看文檔,以了解在實際情況到復雜情況下進行就地轉換所必需的內容。 可以使用-DIN_PLACE重新編譯以上代碼,以查看就地轉換的行為以及必要的代碼更改。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM