Fix for HIP breakage detected on 191210

The following commit introduces compile errors when running eigen with hipcc 2918f85ba9 hipcc errors out because it requies the device attribute on the methods within the TensorBlockV2ResourceRequirements struct instroduced by the commit above. The fix is to add the device attribute to those methods
2025-09-11 00:43:13 +08:00 · 2019-12-10 22:14:05 +00:00 · 2019-12-10 22:14:05 +00:00 · c49f0d851a
commit c49f0d851a
parent 2918f85ba9
2 changed files with 10 additions and 6 deletions
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@ -105,7 +105,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
 // We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
 // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
 // of the functions, while the latter can only deal with one of them.
-#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
 namespace {

 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlockV2.h
@ -82,7 +82,8 @@ struct TensorBlockV2ResourceRequirements {
               : internal::kSkewedInnerDims;
  }

-  static TensorBlockV2ResourceRequirements
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements
  merge(const TensorBlockV2ResourceRequirements &lhs,
        const TensorBlockV2ResourceRequirements &rhs) {
    return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)};
@ -91,19 +92,22 @@ struct TensorBlockV2ResourceRequirements {
  // This is a resource requirement that should be returned from expressions
  // that do not have any block evaluation preference (e.g. default tensor
  // expression with raw buffer access).
-  static TensorBlockV2ResourceRequirements any() {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE TensorBlockV2ResourceRequirements any() {
    return {TensorBlockV2ShapeType::kUniformAllDims, 1};
  }

 private:
  using Requirements = TensorBlockV2ResourceRequirements;

-  static size_t merge(size_t lhs_size, size_t rhs_size) {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE size_t merge(size_t lhs_size, size_t rhs_size) {
    return numext::maxi(lhs_size, rhs_size);
  }

-  static TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs,
-                                      TensorBlockV2ShapeType rhs) {
+  EIGEN_DEVICE_FUNC
+  static EIGEN_STRONG_INLINE TensorBlockV2ShapeType merge(TensorBlockV2ShapeType lhs,
+							  TensorBlockV2ShapeType rhs) {
    return (lhs == TensorBlockV2ShapeType::kSkewedInnerDims ||
            rhs == TensorBlockV2ShapeType::kSkewedInnerDims)
               ? TensorBlockV2ShapeType::kSkewedInnerDims