Fix ODR violations.

This commit is contained in:
Antonio Sánchez 2022-02-04 19:01:07 +00:00 committed by Rasmus Munk Larsen
parent 18b50458b6
commit cafeadffef
5 changed files with 9 additions and 21 deletions

View File

@ -123,7 +123,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
// invoked by NVCC (e.g. on MacOS). The former needs to see both host and device implementation
// of the functions, while the latter can only deal with one of them.
#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
namespace {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
const float& b) {
@ -182,8 +181,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double lt_mask(const double& a,
return __longlong_as_double(a < b ? 0xffffffffffffffffull : 0ull);
}
} // namespace
template <>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
const float4& b) {

View File

@ -194,7 +194,7 @@ struct TensorEvaluator
const Device EIGEN_DEVICE_REF m_device;
};
namespace {
namespace internal {
template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T loadConstant(const T* address) {
return *address;
@ -221,8 +221,7 @@ T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address
return *address;
}
#endif
}
} // namespace internal
// Default evaluator for rvalues
template<typename Derived, typename Device>
@ -291,7 +290,7 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data != NULL);
return loadConstant(m_data+index);
return internal::loadConstant(m_data+index);
}
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -316,7 +315,7 @@ struct TensorEvaluator<const Derived, Device>
eigen_assert(m_data != NULL);
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
: m_dims.IndexOfRowMajor(coords);
return loadConstant(m_data+index);
return internal::loadConstant(m_data+index);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {

View File

@ -30,8 +30,6 @@ namespace Eigen {
namespace internal {
namespace {
// Note: result is undefined if val == 0
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
@ -137,8 +135,6 @@ namespace {
#endif
}
};
}
template <typename T, bool div_gt_one = false>
struct TensorIntDivisor {
@ -254,7 +250,7 @@ private:
template <typename T, bool div_gt_one>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) {
return divisor.divide(numerator);
}

View File

@ -367,8 +367,9 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
};
namespace internal {
// Fixme: figure out the exact threshold
namespace {
template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
@ -398,7 +399,7 @@ template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index
};
#endif
}
} // namespace internal
// Eval as rvalue
template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
@ -509,7 +510,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
}
}
// Use memcpy if it's going to be faster than using the regular evaluation.
const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
const internal::MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {

View File

@ -16,8 +16,6 @@
namespace Eigen {
namespace internal {
namespace {
EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
#if defined(EIGEN_GPU_COMPILE_PHASE)
// We don't support 3d kernels since we currently only use 1 and
@ -45,9 +43,6 @@ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t
return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
}
} // namespace
template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
T RandomToTypeUniform(uint64_t* state, uint64_t stream) {
unsigned rnd = PCG_XSH_RS_generator(state, stream);