I'm on Ubuntu 14.04 with cuda toolkit 6.5, nvidia driver version 340.29. My application registers a pixel buffer from openGL and writes an image to the buffer every loop, copies the PBO to a texture using glTexSubImage2D, and draws the texture. This all works properly until I change my image-generation kernel, then I gdb reports a segmentation fault in cudaGraphicsGLRegisterBuffer. My guess is this is a bug, because the cuda kernel is completely unrelated to cudaGraphicsGLRegisterBuffer, which is called before any processing.
Makefile
CUDA=nvcc
CPP=g++
OUT=out
INC=-I/usr/local/cuda-6.5/include
LINK=-lcudart -lglfw -lGLEW -lGL
FLAGS=-std=gnu++11
CUFLAGS=-std=c++11
all: main.cu GLdisplay.cu
$(CUDA) main.cu GLdisplay.cu -o $(OUT) $(CUFLAGS) $(INC) $(LINK)
clean:
rm ./$(OUT)
add:
git add -A
git status
main.cu
#define GLEW_STATIC
// C++ headers
#include <iostream>
#include <fstream>
#include <cstring>
// openGL headers
#include <GL/glew.h>
#include <GLFW/glfw3.h>
// CUDA headers
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#include "GLdisplay.h"
#define WINDOW_WIDTH 640
#define WINDOW_HEIGHT 480
#define TEX_WIDTH 1920
#define TEX_HEIGHT 1080
using std::cout;
using std::cerr;
using std::endl;
using std::string;
using std::ifstream;
GLFWwindow* window;
GLuint vao, vbo, pbo;
GLuint vtx, frg, shaders;
GLuint tex;
uint8_t* cudaPBOptr;
size_t cudaPBOsize;
cudaGraphicsResource_t cuGfxPBO;
string loadTxtFileAsString( string filename )
{
string source;
string buf = "";
ifstream file( filename, std::ios::in );
while( file.good( ) )
{
std::getline( file, buf );
source.append( buf + "\n" );
}
file.close( );
return source;
}
void shaderCompileCheck( void )
{
GLint status;
// vertex
glGetShaderiv( vtx, GL_COMPILE_STATUS, &status );
if( GL_TRUE != status )
{
char buffer[ 512 ];
glGetShaderInfoLog( vtx, 512, NULL, buffer );
cerr << "vtx err | " << buffer << endl;
}
// fragment
glGetShaderiv( frg, GL_COMPILE_STATUS, &status );
if( GL_TRUE != status )
{
char buffer[ 512 ];
glGetShaderInfoLog( frg, 512, NULL, buffer );
cerr << "frg err | " << buffer << endl;
}
}
// added exit on !cudaSuccess
#define cudaErr(err) cudaError( err, __FILE__, __LINE__ )
inline void cudaError( cudaError_t err, const char* file, uint32_t line, bool abort=true )
{
if( cudaSuccess != err )
{
cerr << "[" << file << ":" << line << "] ";
cerr << cudaGetErrorName( err ) << endl; // print the name instead of description
if( abort ) exit( err );
}
}
// added GL error checking
#define glErr( ) glError( glGetError( ), __FILE__, __LINE__ )
inline void glError( GLenum err, const char* file, uint32_t line, bool abort=false )
{
if( GL_NO_ERROR != err )
{
cerr << "[" << file << ":" << line << "] ";
cerr << glewGetErrorString( err ) << endl;
if( abort ) exit( err );
}
}
// main function
int main( int argc, char* argv[] )
{
string vString = loadTxtFileAsString( "vert.glsl" );
string fString = loadTxtFileAsString( "frag.glsl" );
const GLchar* vtxSource = vString.c_str( );
const GLchar* frgSource = fString.c_str( );
/////////////////////////////////////////////////
glfwInit( );
glfwWindowHint( GLFW_CONTEXT_VERSION_MAJOR, 3 );
glfwWindowHint( GLFW_CONTEXT_VERSION_MINOR, 2 );
glfwWindowHint( GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE );
glfwWindowHint( GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE );
glfwWindowHint( GLFW_RESIZABLE, GL_FALSE );
window = glfwCreateWindow( WINDOW_WIDTH, WINDOW_HEIGHT, "CUDA-GL", NULL, NULL );
glfwMakeContextCurrent( window );
glewExperimental = GL_TRUE;
glewInit( );
glErr(); // added GL error checking
cudaErr( cudaSetDevice( 0 ) );
glErr(); // added GL error checking
/////////////////////////////////////////////////
GLfloat vertices[] = {
// X Y U V
-1.0f, 1.0f, 0.0f, 1.0f, // t l
1.0f, 1.0f, 1.0f, 1.0f, // t r
-1.0f, -1.0f, 0.0f, 0.0f, // b l
-1.0f, -1.0f, 0.0f, 0.0f, // b l
1.0f, -1.0f, 1.0f, 0.0f, // b r
1.0f, 1.0f, 1.0f, 1.0f // t r
};
GLbyte pboInit[ TEX_WIDTH * TEX_HEIGHT * 4 ];
memset( pboInit, 127, sizeof( pboInit ) );
/////////////////////////////////////////////////
glGenVertexArrays( 1, &vao );
glBindVertexArray( vao );
glErr(); // added GL error checking
glGenBuffers( 1, &pbo );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo );
glBufferData( GL_PIXEL_UNPACK_BUFFER, TEX_WIDTH * TEX_HEIGHT * 4, pboInit, GL_STREAM_DRAW );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER, 0 );
glErr(); // added GL error checking
glActiveTexture( GL_TEXTURE0 );
glGenTextures( 1, &tex );
glBindTexture( GL_TEXTURE_2D, tex );
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, TEX_WIDTH, TEX_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
glErr(); // added GL error checking
glGenBuffers( 1, &vbo );
glBindBuffer( GL_ARRAY_BUFFER, vbo );
glBufferData( GL_ARRAY_BUFFER, sizeof( vertices ), vertices, GL_STATIC_DRAW );
glErr(); // added GL error checking
/////////////////////////////////////////////////
cudaErr( cudaGraphicsGLRegisterBuffer( &cuGfxPBO, pbo, cudaGraphicsMapFlagsNone ) );
glErr(); // added GL error checking
/////////////////////////////////////////////////
vtx = glCreateShader( GL_VERTEX_SHADER );
glShaderSource( vtx, 1, &vtxSource, NULL );
glCompileShader( vtx );
frg = glCreateShader( GL_FRAGMENT_SHADER );
glShaderSource( frg, 1, &frgSource, NULL );
glCompileShader( frg );
shaderCompileCheck( );
shaders = glCreateProgram( );
glAttachShader( shaders, vtx );
glAttachShader( shaders, frg );
glBindFragDataLocation( shaders, 0, "outColor" );
glLinkProgram( shaders );
glUseProgram( shaders );
/////////////////////////////////////////////////
GLuint posAtt = glGetAttribLocation( shaders, "position" );
glEnableVertexAttribArray( posAtt );
glVertexAttribPointer( posAtt, 2, GL_FLOAT, GL_FALSE, 4 * sizeof( GLfloat ), NULL );
GLuint texAtt = glGetAttribLocation( shaders, "texcoord" );
glEnableVertexAttribArray( texAtt );
glVertexAttribPointer( texAtt, 2, GL_FLOAT, GL_FALSE, 4 * sizeof( GLfloat ), ( void* )( 2 * sizeof( GLfloat ) ) );
/////////////////////////////////////////////////
glClearColor( 0.0f, 0.0f, 0.0f, 1.0f );
while( !glfwWindowShouldClose( window ) )
{
cudaErr( cudaGraphicsMapResources( 1, &cuGfxPBO, 0 ) );
cudaErr( cudaGraphicsResourceGetMappedPointer( ( void** )&cudaPBOptr, &cudaPBOsize, cuGfxPBO ) );
glErr(); // added GL error checking
dim3 gridDim( ( int32_t )ceil( ( float )TEX_WIDTH / 16 ), ( int32_t )ceil( ( float )TEX_HEIGHT / 16 ) );
dim3 blockDim( 16, 16 );
makeImage<<< gridDim, blockDim >>>( cudaPBOptr, TEX_WIDTH, TEX_HEIGHT );
cudaDeviceSynchronize( ); cudaErr( cudaGetLastError( ) );
glErr(); // added GL error checking
cudaErr( cudaGraphicsUnmapResources( 1, &cuGfxPBO, 0 ) );
glErr(); // added GL error checking
/////////////////////////////////////////////////
glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo );
glErr(); // added GL error checking
glTexSubImage2D( GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, NULL );
glErr(); // added GL error checking
glBindBuffer( GL_PIXEL_UNPACK_BUFFER, 0 );
glErr(); // added GL error checking
/////////////////////////////////////////////////
glClear( GL_COLOR_BUFFER_BIT );
glDrawArrays( GL_TRIANGLES, 0, 6 );
glErr(); // added GL error checking
glfwSwapBuffers( window );
glfwPollEvents( );
}
/////////////////////////////////////////////////
glDeleteTextures( 1, &tex );
glDeleteProgram( shaders );
glDeleteShader( frg );
glDeleteShader( vtx );
glDeleteBuffers( 1, &pbo );
glDeleteBuffers( 1, &vbo );
glDeleteVertexArrays( 1, &vao );
return 0;
}
GLdisplay.cu
#include <cuda_runtime.h>
#include <stdint.h>
#include "GLdisplay.h"
__global__ void makeImage( uint8_t* output, uint32_t width, uint32_t height )
{
uint32_t xIdx = threadIdx.x + blockIdx.x * blockDim.x;
uint32_t yIdx = threadIdx.y + blockIdx.y * blockDim.y;
uint32_t oIdx = ( xIdx + yIdx * width ) * 4;
if( xIdx >= width || yIdx >= height ) return;
// segfault occurs if any of the lines below are commented or set to write 0
uint8_t red = ( uint8_t )( 255.0f * ( float )( xIdx + yIdx * width ) / ( width * height ) );
uint8_t gre = ( uint8_t )( 255.0f * ( 1.0f - ( float )( xIdx + yIdx * width ) / ( width * height ) ) );
uint8_t blu = ( uint8_t )( 510.0f * abs( ( float )( xIdx + yIdx * width ) / ( width * height ) ) - 0.5f );
output[ oIdx + 0 ] = red;
output[ oIdx + 1 ] = gre;
output[ oIdx + 2 ] = blu;
output[ oIdx + 3 ] = 255;
}
GLdisplay.h
#ifndef GL_DISPLAY_H
#define GL_DISPLAY_H
__global__ void makeImage( uint8_t*, uint32_t, uint32_t );
#endif
vert.glsl
#version 150
in vec2 position;
in vec2 texcoord;
out vec2 Texcoord;
void main( )
{
Texcoord = texcoord;
gl_Position = vec4( position, 0.0, 1.0 );
}
frag.glsl
#version 150
in vec2 Texcoord;
out vec4 outColor;
uniform sampler2D tex;
void main( )
{
outColor = texture( tex, Texcoord );
}
Your code lacks error condition checking. First and foremost I suggest you check the error code status after each and every OpenGL and CUDA call. I've got a very strong hunch what the problem is and adding the error checking will tell if this is the case.
So: I see that you never properly unbind the PBO from OpenGL before trying to map it to a CUDA pointer. Depending on the path your program takes this may result in the PBO actually not getting mapped, hence giving you an invalid pointer that if accessed, from the CUDA kernel or the host, will result in a segfault.
To pin down the problem check the error status of every OpenGL and CUDA operation. CUDA will tell you if it can't map a OpenGL resource, for whatever reason (like the resource still being bound to a OpenGL functional unit).
Updating to nvidia display driver 346.47 solved my problem completely. For the unsure, run nvidia-smi
in a terminal window, and if you don't see NVIDIA-SMI 346.xx
(xx being an arbitrary number), update with the newest driver available from nvidia. At the time of writing this, CUDA toolkit 6.5 ships with an outdated graphics driver.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.