diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 906160d37..ad0959979 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -403,6 +403,16 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) else() set(CUDA_PROPAGATE_HOST_FLAGS OFF) set(NVCC_ARCH_FLAGS) + # Define an -arch=sm_, otherwise if GPU does not exactly match one of + # those in the arch list for -gencode, the kernels will fail to run with + # cudaErrorNoKernelImageForDevice + # This can happen with newer cards (e.g. sm_75) and compiling with older + # versions of nvcc (e.g. 9.2) that do not support their specific arch. + list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE) + if(EIGEN_CUDA_COMPUTE_ARCH_SIZE) + list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT) + set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}") + endif() foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH) string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}") endforeach() diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index a962e3f6c..8aa524d54 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -303,6 +303,16 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) else() set(CUDA_PROPAGATE_HOST_FLAGS OFF) set(NVCC_ARCH_FLAGS) + # Define an -arch=sm_, otherwise if GPU does not exactly match one of + # those in the arch list for -gencode, the kernels will fail to run with + # cudaErrorNoKernelImageForDevice + # This can happen with newer cards (e.g. sm_75) and compiling with older + # versions of nvcc (e.g. 9.2) that do not support their specific arch. + list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE) + if(EIGEN_CUDA_COMPUTE_ARCH_SIZE) + list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT) + set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}") + endif() foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH) string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}") endforeach()