简体   繁体   English

cudaMemGetInfo在GTX 690的两个设备上返回相同数量的可用内存

[英]cudaMemGetInfo returns same amount of free memory on both devices of GTX 690

I have run into problems with Geforce GTX 690 while trying to track down the memory usage. 我在尝试跟踪内存使用时遇到了Geforce GTX 690问题。 A simple test program: 一个简单的测试程序:

BOOST_AUTO_TEST_CASE(cudaMemoryTest) {

size_t mem_tot_0 = 0;
size_t mem_free_0 = 0;
size_t mem_tot_1 = 0;
size_t mem_free_1 = 0;

unsigned int mem_size = 100*1000000;

float* h_P = new float[mem_size];
for(size_t i = 0; i < mem_size; i++) {
    h_P[i] = 0.f;
}

cudaSetDevice(0);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory before copy dev 0: "<<mem_free_0<<std::endl;

cudaSetDevice(1);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_1, &mem_tot_1);
std::cout<<"Free memory before copy dev 1: "<<mem_free_1<<std::endl;

cudaSetDevice(0);
float* P;
cudaMalloc((void**)&P, mem_size*sizeof(float));
cudaMemcpy((void*)P, h_P,  mem_size*sizeof(float), cudaMemcpyHostToDevice);

cudaSetDevice(0);
cudaMemGetInfo(&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after copy  dev 0: "<<mem_free_0<<std::endl;

cudaSetDevice(1);
cudaMemGetInfo(&mem_free_1, &mem_tot_1);
std::cout<<"Free memory after copy dev 1: "<<mem_free_1<<std::endl;

BOOST_CHECK(mem_free_0 != mem_free_1);

cudaError_t err;
err = cudaGetLastError();
if(err!=cudaSuccess)
    std::cout<<"an error occurred"<<std::endl;

cudaSetDevice(0);
destroyMem(P);
delete [] h_P;
}

The test prints out: 测试输出:

1>  Free memory before copy dev 0: 1733173248
1>  Free memory before copy dev 1: 1688424448
1>  Free memory after copy  dev 0: 1289940992
1>  Free memory after copy dev 1: 1289940992     
CudaUtilsTest.cpp(47): error in "cudaMemoryTest": check mem_free_0 != mem_free_1 failed

The problem is that after the allocation the amount of free memory on device 1 is exactly the same as on device 0, which shouldn't be the case, hence the problem has to be in cudaMemGetInfo and/or cudaSetDevice. 问题在于,分配后,设备1上的可用内存量与设备0上的完全相同,事实并非如此,因此问题必须出在cudaMemGetInfo和/或cudaSetDevice中。 Anyone run on the same problem, or is there something else fundamentally wrong in the test that someone can point out? 任何人都遇到相同的问题,或者在测试中有人可以指出的其他根本错误吗?

Running the code on Windows 7, Visual studio 2010, Cuda SDK 5.0, compiling with code generation: compute_30,sm_30 在Windows 7,Visual Studio 2010,Cuda SDK 5.0上运行代码,并通过以下代码生成进行编译:compute_30,sm_30

EDIT 22.4.2013 编辑22.4.2013

I continued experimenting with this issue and it seems that cudaSetDevice works fine as can be verified from the result of cudaGetDevice calls. 我继续尝试此问题,似乎cudaSetDevice可以正常工作,可以通过cudaGetDevice调用的结果进行验证。 I added a reset of device 0 after the memory allocation test and it seems that the size of free memory returned by cudaMemGetInfo is again same for both devices. 在内存分配测试之后,我添加了设备0的重置,并且似乎cudaMemGetInfo返回的可用内存的大小对于这两个设备再次相同。 I have checked all return values of cuda_error_t in my own code and all function calls return cudaSuccess. 我已经在自己的代码中检查了cuda_error_t的所有返回值,并且所有函数调用都返回cudaSuccess。 Have anyone run into similar problems with GTX 690 with the setup descibed above? 上面描述过的安装程序是否有人遇到过GTX 690类似的问题?

Most resent test code: 最近寄出的测试代码:

BOOST_AUTO_TEST_CASE(cudaMemoryTest) {
size_t mem_tot_0 = 0;
size_t mem_free_0 = 0;
size_t mem_tot_1 = 0;
size_t mem_free_1 = 0;

int device_num = 0;

unsigned int mem_size = 100*1000000;

float* h_P = new float[mem_size];
for(size_t i = 0; i < mem_size; i++) {
    h_P[i] = 0.f;
}

cudaSetDevice(0);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory before copy dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_1, & mem_tot_1);
std::cout<<"Free memory before copy dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(0);
cudaGetDevice(&device_num);
float* P;
cudaMalloc((void**)&P, mem_size*sizeof(float));
cudaMemcpy((void*)P, h_P,  mem_size*sizeof(float), cudaMemcpyHostToDevice);
cudaMemGetInfo(&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after copy  dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaMemGetInfo(&mem_free_1, &mem_tot_1);
std::cout<<"Free memory after copy dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

BOOST_CHECK(mem_free_0 != mem_free_1);

cudaError_t err;
err = cudaGetLastError();
if(err!=cudaSuccess)
    std::cout<<"an error occurred"<<std::endl;

// Reset only device 0 and check both
cudaSetDevice(0);
cudaGetDevice(&device_num);
cudaDeviceReset();
cudaMemGetInfo  (&mem_free_0, & mem_tot_0);
std::cout<<"Free memory after second reset of device 0, dev 0: "<<mem_free_0<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

cudaSetDevice(1);
cudaGetDevice(&device_num);
cudaMemGetInfo  (&mem_free_1, & mem_tot_1);
std::cout<<"Free memory after second device reset of device 0, dev 1: "<<mem_free_1<<" Device: "<<device_num<<std::endl;
cudaDeviceSynchronize();

delete [] h_P;
    }

Test output: 测试输出:

1>  Free memory before copy dev 0: 1794379776 Device: 0
1>  Free memory before copy dev 1: 1751728128 Device: 1
1>  Free memory after copy  dev 0: 1351696384 Device: 0
1>  Free memory after copy dev 1: 1351696384 Device: 1
1>  CudaUtilsTest.cpp(353): error in "cudaMemoryTest": check mem_free_0 != mem_free_1 failed
1>  Free memory after second reset of device 0, dev 0: 1751728128 Device: 0
1>  Free memory after second device reset of device 0, dev 1: 1751728128 Device: 1

This was solved by changed WDDM driver settings as follows: 通过更改WDDM驱动程序设置可以解决此问题,如下所示:

Switch "Disable multi-GPU mode" from NVIDIA control panel at "3D-settings" -> "Configure Multi-GPU, Surround, PhysX". 从NVIDIA控制面板的“ 3D设置”->“配置多GPU,环绕声,PhysX”切换“禁用多GPU模式”。

[This answer added from comments as a community wiki entry to get the question off the unanswered queue for the CUDA tag] [此答案从注释中添加为社区Wiki条目,以使问题摆脱CUDA标签的未答复队列]

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM