call a CUDA kernel (or launch a kernel); cudaError_t err = cudaGetLastError(); if ( err != cudaSuccess ) { printf("CUDA Error: %s\n", cudaGetErrorString(err)); // Possibly: exit(-1) if program cannot continue.... } |
__global__ void hello( ) { printf("blockIdx.x=%d/%d blocks, threadIdx.x=%d/%d threads\n", blockIdx.x, gridDim.x, threadIdx.x, blockDim.x); } int main() { hello<<< 1, 1025 >>>( ); // Error: #threads >= 1024 !!! printf("I am the CPU: Hello World ! \n"); cudaDeviceSynchronize(); return 0; } |
/home/cs355001/demo/CUDA/1-intro/hello-error Output: I am the CPU: Hello World ! (No error message from the GPU execution !!!) |
__global__ void hello( ) { printf("blockIdx.x=%d/%d blocks, threadIdx.x=%d/%d threads\n", blockIdx.x, gridDim.x, threadIdx.x, blockDim.x); } int main() { hello<<< 1, 1025 >>>( ); // Error !!! cudaError_t err = cudaGetLastError(); // Get error code if ( err != cudaSuccess ) { printf("CUDA Error: %s\n", cudaGetErrorString(err)); exit(-1); } printf("I am the CPU: Hello World ! \n"); cudaDeviceSynchronize(); return 0; } |
/home/cs355001/demo/CUDA/1-intro/hello-error2 Output: CUDA Error: invalid configuration argument |