简体   繁体   中英

cuFFT cannot recover after an error

I cannot find a way to start cuFFT processing after a previous unsuccessful launch.

Here is a minimal example. The main idea is as follows: we create a simple cuFTT processor which can manage its resources ( device memory and cuFFT plans). We check that this processor does make FFT. Then we ask to create too many plans, thus we enforce cuFFT error. Then we release all resources and try to repeat the successful launch. However, the processor can do nothing after the failure.

Firstly, here is a rather long preamble:

#include <iostream>
using std::cout;
using std::cerr;
using std::endl;

#include <vector>
using std::vector;

#include "cuda_runtime.h"
#include "cufft.h"

// cuFFT API errors
static char* _cufftGetErrorEnum( cufftResult_t error )
{
    switch ( error )
    {
        case CUFFT_SUCCESS:
        return "CUFFT_SUCCESS";

        case CUFFT_INVALID_PLAN:
        return "cuFFT was passed an invalid plan handle";

        case CUFFT_ALLOC_FAILED:
        return "cuFFT failed to allocate GPU or CPU memory";

        // No longer used
        case CUFFT_INVALID_TYPE:
        return "CUFFT_INVALID_TYPE";

        case CUFFT_INVALID_VALUE:
        return "User specified an invalid pointer or parameter";

        case CUFFT_INTERNAL_ERROR:
        return "Driver or internal cuFFT library error";

        case CUFFT_EXEC_FAILED:
        return "Failed to execute an FFT on the GPU";

        case CUFFT_SETUP_FAILED:
        return "The cuFFT library failed to initialize";

        case CUFFT_INVALID_SIZE:
        return "User specified an invalid transform size";

        // No longer used
        case CUFFT_UNALIGNED_DATA:
        return "CUFFT_UNALIGNED_DATA";

        case CUFFT_INCOMPLETE_PARAMETER_LIST:
        return "Missing parameters in call";

        case CUFFT_INVALID_DEVICE:
        return "Execution of a plan was on different GPU than plan creation";

        case CUFFT_PARSE_ERROR:
        return "Internal plan database error";

        case CUFFT_NO_WORKSPACE:
        return "No workspace has been provided prior to plan execution";

        case CUFFT_NOT_IMPLEMENTED:
        return "CUFFT_NOT_IMPLEMENTED";

        case CUFFT_LICENSE_ERROR:
        return "CUFFT_LICENSE_ERROR";
    }

    return "<unknown>";
}

// check cuda runtime calls
bool cudaCheck( cudaError_t err )
{
    if ( err != cudaSuccess )
    {
        cudaDeviceSynchronize();
        cerr << cudaGetErrorString( cudaGetLastError() ) << endl;
        return false;
    }

    return true;
}

// check cuFFT calls
bool cufftCheck( cufftResult_t err )
{
    if ( err != CUFFT_SUCCESS )
    {
        cerr << _cufftGetErrorEnum( err ) << endl;
        return false;
    }

    return true;
}

Next, we define a simple cuFFT processor which can manage its resources (device memory and cuFFT plans)

class CCuFFT_Processor
{
    vector<cufftHandle> _plans;
    cufftComplex *_data;
    size_t _data_bytes;

    // Release resouces
    bool ReleaseAll();
    bool ReleaseMemory();
    bool ReleasePlans();

public:

    CCuFFT_Processor() :
    _data( NULL ),
    _data_bytes( 0 )
    {
        _plans.reserve( 32 );
        _plans.clear();
    }

    ~CCuFFT_Processor()
    {
        ReleaseAll();
    }

    bool Run();
    bool Alloc( size_t data_len, size_t batch_len );
};

Here is how we are going to release resources:

bool     CCuFFT_Processor::ReleaseMemory()
{
    bool chk = true;

    if ( _data != NULL )
    {
        chk         = cudaCheck( cudaFree( _data ) );
        _data       = NULL;
        _data_bytes = 0;
    }

    return chk;
}

bool CCuFFT_Processor::ReleasePlans()
{
    bool chk = true;

    for ( auto & p : _plans )
        chk = chk && cufftCheck( cufftDestroy( p ) );

    _plans.clear();

    return chk;
}

bool CCuFFT_Processor::ReleaseAll()
{
    bool chk = true;

    chk = chk && cudaCheck( cudaDeviceSynchronize() );
    chk = chk && ReleaseMemory();
    chk = chk && ReleasePlans();
    chk = chk && cudaCheck( cudaDeviceReset() );

    return chk;
}

Here is the implementation of the main functionality:

bool CCuFFT_Processor::Alloc( size_t data_len, size_t batch_len )
{
    bool   chk   = true;
    size_t bytes = sizeof( cufftComplex ) * data_len * batch_len;

    // CUDA resources

    if ( _data_bytes < bytes )
        chk = chk && ReleaseMemory();

    if ( _data == NULL )
    {
        chk         = chk && cudaCheck( cudaMalloc( (void **)&_data, bytes ) );
        _data_bytes = bytes;
    }

    // cuFFT resources

    chk = chk && ReleasePlans();

    for ( size_t b = 1; chk && ( b <= batch_len ); b *= 2 )
    {
        cufftHandle new_plan;

        chk = cufftCheck(
            cufftPlan1d( &new_plan, int(data_len), CUFFT_C2C, int(b) ) );

        if ( chk )
            _plans.push_back( new_plan );
    }

    if ( !chk )
        ReleaseAll();

    return chk;
}

bool CCuFFT_Processor::Run()
{
    bool chk = true;

    chk = cufftCheck(
        cufftExecC2C( *_plans.rbegin(), _data, _data, CUFFT_FORWARD ) );

    if ( !chk )
        ReleaseAll();

    chk = chk && cudaCheck( cudaDeviceSynchronize() );

    return chk;
}

Finally, the program

int main()
{
    size_t batch  = 1 << 5;
    size_t length = 1 << 21;

    CCuFFT_Processor proc;

    // Normal run
    if ( proc.Alloc( length, batch ) )
        proc.Run();

    // Run with error
    length *= 4;

    if ( proc.Alloc( length, batch ) )
        proc.Run();

    // Normal run : check recovery
    length /= 4;

    if ( proc.Alloc( length, batch ) )
        proc.Run();

    return EXIT_SUCCESS;
}

If I use a small length = 1 << 18 , then no errors occur. However, for the large length = 1 << 21 two errors appear:

cuFFT failed to allocate GPU or CPU memory
Failed to execute an FFT on the GPU

The first error is an expected one, we have done this intentionally. But the second one is not. Although the device was reset and new resources were successfully allocated, cuFFT failed to execute an FFT.

I use GTX 970. I tried all combinations of: cuda 6.5, cuda 7.5, 32-bit platform, 64-bit platform etc, but unsuccessfully.

This was apparently a problem limited to the out-of-memory error recovery behaviour of older versions of cuFFT, and was rectified during the CUDA 8 release cycle. If (6 years later) you are still using a pre-CUDA 8 version of cuFFT, please update to something more modern and this issue will be resolved.

[answer assembled from comments and added as a community wiki entry to get the question off the unanswered list for the CUDA and cuFFT tags]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM