mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 20:56:00 +08:00
Only attempt to use the texture path on GPUs when it's supported by CUDA
This commit is contained in:
parent
864318e508
commit
81f9e968fd
@ -109,6 +109,24 @@ struct TensorEvaluator
|
|||||||
const Device& m_device;
|
const Device& m_device;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
T loadConstant(const T* address) {
|
||||||
|
return *address;
|
||||||
|
}
|
||||||
|
// Use the texture cache on CUDA devices whenever possible
|
||||||
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||||
|
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
float loadConstant(const float* address) {
|
||||||
|
return __ldg(address);
|
||||||
|
}
|
||||||
|
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
double loadConstant(const double* address) {
|
||||||
|
return __ldg(address);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Default evaluator for rvalues
|
// Default evaluator for rvalues
|
||||||
template<typename Derived, typename Device>
|
template<typename Derived, typename Device>
|
||||||
@ -150,11 +168,7 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||||
eigen_assert(m_data);
|
eigen_assert(m_data);
|
||||||
#ifdef __CUDA_ARCH__
|
return loadConstant(m_data+index);
|
||||||
return __ldg(m_data+index);
|
|
||||||
#else
|
|
||||||
return m_data[index];
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
@ -167,11 +181,7 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
eigen_assert(m_data);
|
eigen_assert(m_data);
|
||||||
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
|
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
|
||||||
: m_dims.IndexOfRowMajor(coords);
|
: m_dims.IndexOfRowMajor(coords);
|
||||||
#ifdef __CUDA_ARCH__
|
return loadConstant(m_data+index);
|
||||||
return __ldg(m_data+index);
|
|
||||||
#else
|
|
||||||
return m_data[index];
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
|
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user