[英]Thrust+boost code compilation error
I have strange problem which I can't solve. 我有一个我无法解决的奇怪问题。 It's connected with boost+thrust code. 它与boost +推力代码相关联。
Code: 码:
#include <boost/config/compiler/nvcc.hpp>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#include <thrust/sequence.h>
#include <thrust/random.h>
#include <thrust/generate.h>
#include <thrust/detail/type_traits.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <common/inc/helper_cuda.h>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/operation.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_int_distribution.hpp>
#include <boost/compute/system.hpp>
#include <boost/compute/command_queue.hpp>
#include <boost/compute/algorithm/generate.hpp>
#include <boost/compute/algorithm/generate_n.hpp>
#include <algorithm>
#include <time.h>
#include <limits.h>
#include <algorithm>
using namespace boost::numeric::ublas;
using namespace boost::random;
using namespace boost::compute;
int main(int argc, char **argv)
{
int N = 100000;
unbounded_array<float> lineMatrix1(N*N);
unbounded_array<float> lineMatrix2(N*N);
generate_n(lineMatrix1.begin(), N*N, []() { return (10 * rand() / RAND_MAX); });
generate_n(lineMatrix2.begin(), N*N, []() { return (10 * rand() / RAND_MAX); });
matrix<float> matrix1(N, N, lineMatrix1);
matrix<float> matrix2(N, N, lineMatrix2);
matrix<float> zeroMatrix(N, N, 0);
matrix<float> zeroMatrix2(N, N, 0);
//boost single core computation start
auto matrix3 = prod(matrix1, matrix2);
//boost single core computation finish
//thrust computation start
findCudaDevice(argc, (const char **)argv);
cublasHandle_t handle;
cublasCreate(&handle);
float alpha = 1.0f;
float beta = 0.0f;
auto result = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, N, N, N, &alpha, matrix1.data().cbegin(), N, matrix2.data().cbegin(), N, &beta, zeroMatrix.data().begin(), N);
cudaDeviceSynchronize();
thrust::device_vector<float> deviceMatrix1(N*N);
thrust::device_vector<float> deviceMatrix2(N*N);
thrust::device_vector<float> deviceZeroMatrix(N*N, 0);
thrust::copy(matrix1.data().cbegin(), matrix1.data().cend(), deviceMatrix1.begin());
thrust::copy(matrix2.data().cbegin(), matrix2.data().cend(), deviceMatrix2.begin());
auto result2 = cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, N, N, N, &alpha, deviceMatrix1.data().get(), N, deviceMatrix2.data().get(), N, &beta, deviceZeroMatrix.data().get(), N);
cudaDeviceSynchronize();
thrust::copy(deviceZeroMatrix.cbegin(), deviceZeroMatrix.cend(), zeroMatrix2.data().begin());
std::cout << result << std::endl;
std::cout << result2 << std::endl;
//thrust computation finish
float eps = 0.00001;
int differCount1 = 0;
int differCount2 = 0;
for (int i = 0; i < matrix3.size1(); i++)
{
for (int j = 0; j < matrix3.size2(); j++)
{
if (std::abs(matrix3(i, j) != zeroMatrix(i, j)) > eps)
differCount1++;
if (std::abs(matrix3(i, j) != zeroMatrix2(i, j)) > eps)
differCount2++;
}
}
std::cout << differCount1 << std::endl;
std::cout << differCount2 << std::endl;
char c;
std::cin >> c;
return 0;
}
This file has name 'myFirstMatrixTest.cu'. 此文件的名称为“myFirstMatrixTest.cu”。
So, I have compilator errors: 所以,我有编译错误:
MSB3721 exit from command ""C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.2\\bin\\nvcc.exe" -gencode=arch=compute_30,code=\\"sm_30,compute_30\\" -gencode=arch=compute_35,code=\\"sm_35,compute_35\\" -gencode=arch=compute_37,code=\\"sm_37,compute_37\\" -gencode=arch=compute_50,code=\\"sm_50,compute_50\\" -gencode=arch=compute_52,code=\\"sm_52,compute_52\\" -gencode=arch=compute_60,code=\\"sm_60,compute_60\\" -gencode=arch=compute_61,code=\\"sm_61,compute_61\\" -gencode=arch=compute_70,code=\\"sm_70,compute_70\\" --use-local-env -ccbin "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Tools\\MSVC\\14.14.26428\\bin\\HostX86\\x64" -x cu -rdc=true -I./ -I../common/inc -I../../common/inc -I/common/inc -I../ -I./ -I"C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.2/include" -I../../common/inc -I"C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.2\\include" -G --keep-dir x64\\Debug -maxrregcount=0 --machine 64 --compile -cudart static -Xcompiler "/wd 4819" -g -DWIN32 -DWIN32 -D_MBCS -D MSB3721退出命令“”C:\\ Program Files \\ NVIDIA GPU Computing Toolkit \\ CUDA \\ v9.2 \\ bin \\ nvcc.exe“-gencode = arch = compute_30,code = \\”sm_30,compute_30 \\“ - gencode = arch = compute_35,code = \\“sm_35,compute_35 \\” - gencode = arch = compute_37,code = \\“sm_37,compute_37 \\” - gencode = arch = compute_50,code = \\“sm_50,compute_50 \\” - gencode = arch = compute_52, code = \\“sm_52,compute_52 \\” - gencode = arch = compute_60,code = \\“sm_60,compute_60 \\” - gencode = arch = compute_61,code = \\“sm_61,compute_61 \\” - gencode = arch = compute_70,code = \\“sm_70,compute_70 \\” - use-local-env -ccbin“C:\\ Program Files(x86)\\ Microsoft Visual Studio \\ 2017 \\ Community \\ VC \\ Tools \\ MSVC \\ 14.14.26428 \\ bin \\ HostX86 \\ x64” -x cu -rdc = true -I./ -I ../common/inc-I ../../ common / inc -I / common / inc -I ../ -I./ -I“C: \\ Program Files \\ NVIDIA GPU Computing Toolkit \\ CUDA \\ v9.2 / include“-I ../../ common / inc -I”C:\\ Program Files \\ NVIDIA GPU Computing Toolkit \\ CUDA \\ v9.2 \\ include“ -G --keep-dir x64 \\ Debug -maxrregcount = 0 --machine 64 --compile -cudart static -Xcompiler“/ wd 4819”-g -DWIN32 -DWIN32 -D_MBCS -D _MBCS -Xcompiler "/EHsc /W3 /nologo /Od /FS /Zi /RTC1 /MTd " -o x64/Debug/MyFirstMatrixTest.cu.obj "C:\\User Root\\Repository\\CUDA Projects\\MatrixMultiplicationThrust\\MyFirstMatrixTest.cu"" with code "2". _MBCS -Xcompiler“/ EHsc / W3 / nologo / Od / FS / Zi / RTC1 / MTd”-o x64 / Debug / MyFirstMatrixTest.cu.obj“C:\\ User Root \\ Repository \\ CUDA Projects \\ MatrixMultiplicationThrust \\ MyFirstMatrixTest.cu” “用代码”2“。 MyFirstMatrixTest C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\Common7\\IDE\\VC\\VCTargets\\BuildCustomizations\\CUDA 9.2.targets 707 MyFirstMatrixTest C:\\ Program Files(x86)\\ Microsoft Visual Studio \\ 2017 \\ Community \\ Common7 \\ IDE \\ VC \\ VCTargets \\ BuildCustomizations \\ CUDA 9.2.targets 707
and this: 和这个:
Fatal Error C1012 unmatched parenthesis : missing character ")" MyFirstMatrixTest c:\\local\\boost\\preprocessor\\slot\\detail\\shared.hpp 27 致命错误C1012不匹配的括号:缺少字符“)”MyFirstMatrixTest c:\\ local \\ boost \\ preprocessor \\ slot \\ detail \\ shared.hpp 27
Why could this error occur? 为什么会出现此错误?
Thank you. 谢谢。
Well, the first problem is 嗯,第一个问题是
int N = 100000;
So N^2 = 10,000,000,000... (will never fit in an int
). 所以N ^ 2 = 10,000,000,000 ......(绝不适合int
)。 That is 10G*4 bytes(float) = 40 GBytes of data. 即10G * 4字节(浮点)= 40 GB的数据。 For me that throws a memory exception. 对我来说,抛出一个内存异常。
The next problem I had was with the combination of unbounded_array
and generate_n
. 我遇到的下一个问题是unbounded_array
和generate_n
的组合。 Just didn't work. 只是没工作。 But since you're using Thrust, use the Thrust types and algorithms (I'm not sure why Thrust has it's own types to replace STL, but whatever). 但是因为你正在使用Thrust,所以使用Thrust类型和算法(我不知道为什么Thrust有自己的类型来替换STL,但无论如何)。
I'm using Visual Studio 2017 v15.7 in 2015 mode (else I get a not supported error) with Cuda v9.2 and Boost 1.67.0. 我使用Cuda v9.2和Boost 1.67.0在2015模式下使用Visual Studio 2017 v15.7(否则我得到一个不支持的错误)。
I modified your code until it compiles correctly: (Note the correction in the randomizer functor, it was first only generating integers and casting them to floats) 我修改了你的代码,直到它正确编译:(注意随机函数仿函数中的修正,它首先只生成整数并将它们转换为浮点数)
#include <boost/config/compiler/nvcc.hpp>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/copy.h>
#include <thrust/generate.h>
#include <thrust/inner_product.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#pragma comment(lib,"cublas.lib")
#include <helper_cuda.h>
#include <boost/numeric/ublas/matrix.hpp>
//#include <boost/numeric/ublas/io.hpp>
using boost::numeric::ublas::matrix;
#include <random>
int main(int argc, char **argv)
{
constexpr size_t N = 100;
constexpr size_t NN = N * N;
thrust::host_vector<float> lineMatrix1; lineMatrix1.reserve(NN);
thrust::host_vector<float> lineMatrix2; lineMatrix2.reserve(NN);
{
std::random_device rd; //Will be used to obtain a seed for the random number engine
std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd()
std::uniform_real_distribution<float> dis(0.0f, 10.0f);
auto genRnd = [&]() { return dis(gen); };
thrust::generate_n(std::back_inserter(lineMatrix1), NN, genRnd);
thrust::generate_n(std::back_inserter(lineMatrix2), NN, genRnd);
}
matrix<float> matrix1(N, N);
thrust::copy_n(std::cbegin(lineMatrix1), NN, std::begin(matrix1.begin1()));
//std::cout << "Matrix 1:\n" << matrix1 << std::endl;
matrix<float> matrix2(N, N);
thrust::copy_n(std::cbegin(lineMatrix2), NN, std::begin(matrix2.begin1()));
//std::cout << "Matrix 2:\n" << matrix2 << std::endl;
//auto matrix3 = prod(matrix1, matrix2);
auto matrix3 = trans(prod(trans(matrix1), trans(matrix2)));
//std::cout << "Matrix 3:\n" << matrix3 << std::endl;
thrust::host_vector<float> hostResult; hostResult.reserve(NN);
for (auto rowIt = matrix3.cbegin1(); rowIt != matrix3.cend1(); rowIt++)
for (const auto& element : rowIt)
hostResult.push_back(element);
std::cout << "Host Result:\n";
for (const auto& el : hostResult) std::cout << el << " ";
std::cout << std::endl;
//////boost single core computation finish
//////thrust computation start
findCudaDevice(argc, (const char **)argv);
cublasHandle_t handle;
cublasCreate(&handle);
const float alpha = 1.0f;
const float beta = 0.0f;
thrust::device_vector<float> deviceMatrix1; deviceMatrix1.reserve(NN);
thrust::copy_n(std::cbegin(lineMatrix1), NN, std::back_inserter(deviceMatrix1));
thrust::device_vector<float> deviceMatrix2; deviceMatrix2.reserve(NN);
thrust::copy_n(std::cbegin(lineMatrix2), NN, std::back_inserter(deviceMatrix2));
thrust::device_vector<float> deviceZeroMatrix(NN,0);
auto result2 = cublasSgemm(handle,
CUBLAS_OP_N, CUBLAS_OP_N, N, N, N,
&alpha,
deviceMatrix1.data().get(), N,
deviceMatrix2.data().get(), N,
&beta,
deviceZeroMatrix.data().get(), N);
cudaDeviceSynchronize();
cublasDestroy(handle);
thrust::host_vector<float> deviceResult; deviceResult.reserve(NN);
thrust::copy_n(std::cbegin(deviceZeroMatrix), NN, std::back_inserter(deviceResult));
std::cout << "Device Result:\n";
for (const auto& el : deviceResult) std::cout << el << " ";
std::cout << std::endl;
//////thrust computation finish
auto accError = thrust::inner_product(std::cbegin(hostResult), std::cend(hostResult), std::cbegin(deviceResult), 0.0f, std::plus<float>(),
[](auto val1, auto val2) { return std::abs(val1 - val2); });
std::cout << "Accumulated error: " << accError << std::endl;
std::cout << "Average error: " << accError/NN << std::endl;
std::cin.ignore();
return 0;
}
edit: Fixed the code. 编辑:修复了代码。 ublas matrix stores the matrices different then vector, so I had to transpose the matrices and the result. ublas矩阵存储的矩阵与矢量不同,所以我不得不转置矩阵和结果。 Furthermore, it turned out to be difficult to copy the ublas matrix back to a vector. 此外,事实证明难以将ublas矩阵复制回矢量。
edit2: compilation parameters edit2:编译参数
"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\bin\nvcc.exe" -gencode=arch=compute_30,code=\"sm_30,compute_30\" --use-local-env -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_amd64" -x cu -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include" -G --keep-dir x64\Debug -maxrregcount=0 --machine 64 --compile -cudart static -g -DWIN32 -DWIN64 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /FS /Zi /RTC1 /MDd " -o x64\Debug\kernel.cu.obj "C:\Cpp\Cuda\SoHelp2\kernel.cu"
你正在使用lambdas - 将'--std = c ++ 11'选项提供给nvcc。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.