mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-09 06:31:47 +08:00
Fix ODR violations.
(cherry picked from commit cafeadffef2a7ba41f2da5cf34c38068d74499eb)
This commit is contained in:
parent
0cd4719f3e
commit
52e545324e
@ -121,7 +121,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
|
|||||||
// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
|
// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
|
||||||
// of the functions, while the latter can only deal with one of them.
|
// of the functions, while the latter can only deal with one of them.
|
||||||
#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
|
#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
|
||||||
namespace {
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
|
||||||
const float& b) {
|
const float& b) {
|
||||||
@ -180,8 +179,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double lt_mask(const double& a,
|
|||||||
return __longlong_as_double(a < b ? 0xffffffffffffffffull : 0ull);
|
return __longlong_as_double(a < b ? 0xffffffffffffffffull : 0ull);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
|
||||||
const float4& b) {
|
const float4& b) {
|
||||||
|
@ -192,7 +192,7 @@ struct TensorEvaluator
|
|||||||
const Device EIGEN_DEVICE_REF m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace {
|
namespace internal {
|
||||||
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T loadConstant(const T* address) {
|
T loadConstant(const T* address) {
|
||||||
return *address;
|
return *address;
|
||||||
@ -219,8 +219,7 @@ T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address
|
|||||||
return *address;
|
return *address;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
} // namespace internal
|
||||||
|
|
||||||
|
|
||||||
// Default evaluator for rvalues
|
// Default evaluator for rvalues
|
||||||
template<typename Derived, typename Device>
|
template<typename Derived, typename Device>
|
||||||
@ -289,7 +288,7 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||||
eigen_assert(m_data != NULL);
|
eigen_assert(m_data != NULL);
|
||||||
return loadConstant(m_data+index);
|
return internal::loadConstant(m_data+index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
@ -314,7 +313,7 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
eigen_assert(m_data != NULL);
|
eigen_assert(m_data != NULL);
|
||||||
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
|
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
|
||||||
: m_dims.IndexOfRowMajor(coords);
|
: m_dims.IndexOfRowMajor(coords);
|
||||||
return loadConstant(m_data+index);
|
return internal::loadConstant(m_data+index);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
||||||
|
@ -28,8 +28,6 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// Note: result is undefined if val == 0
|
// Note: result is undefined if val == 0
|
||||||
template <typename T>
|
template <typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -135,8 +133,6 @@ namespace {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T, bool div_gt_one = false>
|
template <typename T, bool div_gt_one = false>
|
||||||
struct TensorIntDivisor {
|
struct TensorIntDivisor {
|
||||||
@ -252,7 +248,7 @@ private:
|
|||||||
|
|
||||||
|
|
||||||
template <typename T, bool div_gt_one>
|
template <typename T, bool div_gt_one>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
|
||||||
return divisor.divide(numerator);
|
return divisor.divide(numerator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -369,8 +369,9 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
// Fixme: figure out the exact threshold
|
// Fixme: figure out the exact threshold
|
||||||
namespace {
|
|
||||||
template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
|
template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
|
||||||
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
|
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
|
||||||
EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
|
EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
|
||||||
@ -400,7 +401,7 @@ template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
} // namespace internal
|
||||||
|
|
||||||
// Eval as rvalue
|
// Eval as rvalue
|
||||||
template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
|
template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
|
||||||
@ -511,7 +512,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Use memcpy if it's going to be faster than using the regular evaluation.
|
// Use memcpy if it's going to be faster than using the regular evaluation.
|
||||||
const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
|
const internal::MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
|
||||||
if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
|
if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
|
||||||
EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
|
EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
|
||||||
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
|
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
|
||||||
|
@ -14,8 +14,6 @@
|
|||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
|
EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
|
||||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||||
// We don't support 3d kernels since we currently only use 1 and
|
// We don't support 3d kernels since we currently only use 1 and
|
||||||
@ -43,9 +41,6 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t
|
|||||||
return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
|
return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
T RandomToTypeUniform(uint64_t* state, uint64_t stream) {
|
T RandomToTypeUniform(uint64_t* state, uint64_t stream) {
|
||||||
unsigned rnd = PCG_XSH_RS_generator(state, stream);
|
unsigned rnd = PCG_XSH_RS_generator(state, stream);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user