mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 11:19:02 +08:00
Add -arch=<arch> argument for nvcc.
Without this flag, when compiling with nvcc, if the compute architecture of a card does not exactly match any of those listed for `-gencode arch=compute_<arch>,code=sm_<arch>`, then the kernel will fail to run with: ``` cudaErrorNoKernelImageForDevice: no kernel image is available for execution on the device. ``` This can happen, for example, when compiling with an older cuda version that does not support a newer architecture (e.g. T4 is `sm_75`, but cuda 9.2 only supports up to `sm_70`). With the `-arch=<arch>` flag, the code will compile and run at the supplied architecture.
This commit is contained in:
parent
846d34384a
commit
de218b471d
@ -403,6 +403,16 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
|
|||||||
else()
|
else()
|
||||||
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
||||||
set(NVCC_ARCH_FLAGS)
|
set(NVCC_ARCH_FLAGS)
|
||||||
|
# Define an -arch=sm_<arch>, otherwise if GPU does not exactly match one of
|
||||||
|
# those in the arch list for -gencode, the kernels will fail to run with
|
||||||
|
# cudaErrorNoKernelImageForDevice
|
||||||
|
# This can happen with newer cards (e.g. sm_75) and compiling with older
|
||||||
|
# versions of nvcc (e.g. 9.2) that do not support their specific arch.
|
||||||
|
list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE)
|
||||||
|
if(EIGEN_CUDA_COMPUTE_ARCH_SIZE)
|
||||||
|
list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT)
|
||||||
|
set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}")
|
||||||
|
endif()
|
||||||
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
||||||
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
|
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
|
||||||
endforeach()
|
endforeach()
|
||||||
|
@ -303,6 +303,16 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
|
|||||||
else()
|
else()
|
||||||
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
||||||
set(NVCC_ARCH_FLAGS)
|
set(NVCC_ARCH_FLAGS)
|
||||||
|
# Define an -arch=sm_<arch>, otherwise if GPU does not exactly match one of
|
||||||
|
# those in the arch list for -gencode, the kernels will fail to run with
|
||||||
|
# cudaErrorNoKernelImageForDevice
|
||||||
|
# This can happen with newer cards (e.g. sm_75) and compiling with older
|
||||||
|
# versions of nvcc (e.g. 9.2) that do not support their specific arch.
|
||||||
|
list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE)
|
||||||
|
if(EIGEN_CUDA_COMPUTE_ARCH_SIZE)
|
||||||
|
list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT)
|
||||||
|
set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}")
|
||||||
|
endif()
|
||||||
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
||||||
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
|
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
|
||||||
endforeach()
|
endforeach()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user