mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 11:19:02 +08:00
CUDA_ARCH isn't always defined, so avoid relying on it too much when figuring out which implementation to use for reductions. Instead rely on the device to tell us on which hardware version we're running.
This commit is contained in:
parent
819d0cea1b
commit
a20b58845f
@ -336,11 +336,9 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||
#elif __CUDA_ARCH__ >= 300
|
||||
#else
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||
#else
|
||||
static const bool HasOptimizedImplementation = false;
|
||||
#endif
|
||||
|
||||
template <typename OutputType>
|
||||
@ -619,11 +617,9 @@ struct InnerReducer<Self, Op, GpuDevice> {
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
|
||||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
|
||||
#elif __CUDA_ARCH__ >= 300
|
||||
#else
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||
#else
|
||||
static const bool HasOptimizedImplementation = false;
|
||||
#endif
|
||||
|
||||
template <typename OutputType>
|
||||
@ -678,12 +674,8 @@ struct OuterReducer<Self, Op, GpuDevice> {
|
||||
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
||||
// so reduce the scope of the optimized version of the code to the simple case
|
||||
// of floats.
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||
#else
|
||||
static const bool HasOptimizedImplementation = false;
|
||||
#endif
|
||||
|
||||
template <typename Device, typename OutputType>
|
||||
static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user