Made most of the packet math primitives usable within CUDA kernel when compiling with clang

This commit is contained in:
Benoit Steiner 2017-02-28 17:05:28 -08:00
parent c92406d613
commit 7b61944669
2 changed files with 4 additions and 4 deletions

View File

@ -167,10 +167,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const d
return make_double2(from[0], from[1]); return make_double2(from[0], from[1]);
} }
template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) { template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
return make_float4(from[0], from[0], from[1], from[1]); return make_float4(from[0], from[0], from[1], from[1]);
} }
template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) { template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
return make_double2(from[0], from[0]); return make_double2(from[0], from[0]);
} }

View File

@ -542,8 +542,8 @@
// - static is not very good because it prevents definitions from different object files to be merged. // - static is not very good because it prevents definitions from different object files to be merged.
// So static causes the resulting linked executable to be bloated with multiple copies of the same function. // So static causes the resulting linked executable to be bloated with multiple copies of the same function.
// - inline is not perfect either as it unwantedly hints the compiler toward inlining the function. // - inline is not perfect either as it unwantedly hints the compiler toward inlining the function.
#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS #define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline #define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline
#ifdef NDEBUG #ifdef NDEBUG
# ifndef EIGEN_NO_DEBUG # ifndef EIGEN_NO_DEBUG