merge

2025-09-24 07:13:16 +08:00 · 2018-10-08 17:35:18 +02:00 · 2018-10-08 17:35:18 +02:00 · 649d4758a6
commit 649d4758a6
parent aa5820056e e29bfe8479
23 changed files with 473 additions and 345 deletions
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@ -420,7 +420,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
      if(size != m_rows*m_cols)
      {
        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
-        if (size)
+        if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
        else
          m_data = 0;
@ -497,7 +497,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
      if(size != _Rows*m_cols)
      {
        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
-        if (size)
+        if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
        else
          m_data = 0;
@ -573,7 +573,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
      if(size != m_rows*_Cols)
      {
        internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
-        if (size)
+        if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
        else
          m_data = 0;
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@ -1217,7 +1217,8 @@ inline int log2(int x)

 /** \returns the square root of \a x.
  *
-  * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
+  * It is essentially equivalent to
+  * \code using std::sqrt; return sqrt(x); \endcode
  * but slightly faster for float/double and some compilers (e.g., gcc), thanks to
  * specializations when SSE is enabled.
  *
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -28,7 +28,7 @@ namespace internal {
 #endif
 #endif

-#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
+#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
 // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
 // have overloads for both types without linking error.
 // One solution is to increase ABI version using -fabi-version=4 (or greater).
--- a/Eigen/src/Core/util/ConfigureVectorization.h
+++ b/Eigen/src/Core/util/ConfigureVectorization.h
@ -379,10 +379,12 @@
  #include <cuda_fp16.h>
 #endif

-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
+#if defined(EIGEN_HIPCC)
  #define EIGEN_VECTORIZE_GPU
  #include <hip/hip_vector_types.h>
+#endif
+
+#if defined(EIGEN_HIP_DEVICE_COMPILE)

  #define EIGEN_HAS_HIP_FP16
  #include <hip/hip_fp16.h>
--- a/Eigen/src/Core/util/IntegralConstant.h
+++ b/Eigen/src/Core/util/IntegralConstant.h
@ -55,7 +55,9 @@ public:
  operator int() const { return value; }
  FixedInt() {}
  FixedInt( VariableAndFixedInt<N> other) {
-    EIGEN_ONLY_USED_FOR_DEBUG(other);
+    #ifndef EIGEN_INTERNAL_DEBUGGING
+    EIGEN_UNUSED_VARIABLE(other);
+    #endif
    eigen_internal_assert(int(other)==N);
  }

--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@ -96,10 +96,16 @@ inline void throw_std_bad_alloc()
 /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
  * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
  */
-inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
+EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
 {
  eigen_assert(alignment >= sizeof(void*) && (alignment & -alignment) == alignment && "Alignment must be at least sizeof(void*) and a power of 2");
+  
+#if defined(EIGEN_HIP_DEVICE_COMPILE)
+  void *original = ::malloc(size+alignment);
+#else
  void *original = std::malloc(size+alignment);
+#endif
+  
  if (original == 0) return 0;
  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
  *(reinterpret_cast<void**>(aligned) - 1) = original;
@ -107,9 +113,15 @@ inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = E
 }

 /** \internal Frees memory allocated with handmade_aligned_malloc */
-inline void handmade_aligned_free(void *ptr)
+EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
 {
-  if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
+  if (ptr) {
+#if defined(EIGEN_HIP_DEVICE_COMPILE)
+    ::free(*(reinterpret_cast<void**>(ptr) - 1));
+#else
+    std::free(*(reinterpret_cast<void**>(ptr) - 1));
+#endif
+  }
 }

 /** \internal
@ -872,6 +884,15 @@ public:

  ~aligned_allocator() {}

+  #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
+  // In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
+  // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
+  // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
+  size_type max_size() const {
+    return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
+  }
+  #endif
+
  pointer allocate(size_type num, const void* /*hint*/ = 0)
  {
    internal::check_size_for_overflow<T>(num);
--- a/Eigen/src/plugins/ReshapedMethods.h
+++ b/Eigen/src/plugins/ReshapedMethods.h
@ -105,7 +105,7 @@ EIGEN_DEVICE_FUNC
 inline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,
                internal::get_compiletime_reshape_size<NRowsType,NColsType,SizeAtCompileTime>::value,
                internal::get_compiletime_reshape_size<NColsType,NRowsType,SizeAtCompileTime>::value,
-                Order==AutoOrder?Flags&RowMajorBit:Order>
+                (Order==AutoOrder?Flags&RowMajorBit:Order)>
 reshaped(NRowsType nRows, NColsType nCols) EIGEN_RESHAPED_METHOD_CONST
 {
  return Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,
@ -128,7 +128,7 @@ reshaped() EIGEN_RESHAPED_METHOD_CONST

 template<int Order>
 EIGEN_DEVICE_FUNC
-inline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived, SizeAtCompileTime, 1, Order==AutoOrder?Flags&RowMajorBit:Order>
+inline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived, SizeAtCompileTime, 1, (Order==AutoOrder?Flags&RowMajorBit:Order)>
 reshaped() EIGEN_RESHAPED_METHOD_CONST
 {
  EIGEN_STATIC_ASSERT(Order==RowMajor || Order==ColMajor || Order==AutoOrder, INVALID_TEMPLATE_PARAMETER);
--- a/test/indexed_view.cpp
+++ b/test/indexed_view.cpp
@ -15,6 +15,14 @@
 #ifdef EIGEN_TEST_PART_3
 // Make sure we also check c++98 max implementation
 #define EIGEN_MAX_CPP_VER 03
+
+// We need to disable this warning when compiling with c++11 while limiting Eigen to c++98
+// Ideally we would rather configure the compiler to build in c++98 mode but this needs
+// to be done at the CMakeLists.txt level.
+#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+  #pragma GCC diagnostic ignored "-Wdeprecated"
+#endif
+
 #endif

 #include <valarray>
--- a/test/ref.cpp
+++ b/test/ref.cpp
@ -255,8 +255,8 @@ void test_ref_overloads()

 void test_ref_fixed_size_assert()
 {
-  Vector4f v4;
-  VectorXf vx(10);
+  Vector4f v4 = Vector4f::Random();
+  VectorXf vx = VectorXf::Random(10);
  VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = v4; (void)y; );
  VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = vx.head<4>(); (void)y; );
  VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = v4; (void)y; );
--- a/test/stddeque.cpp
+++ b/test/stddeque.cpp
@ -18,7 +18,7 @@ void check_stddeque_matrix(const MatrixType& m)
  Index rows = m.rows();
  Index cols = m.cols();
  MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
-  std::deque<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType(rows,cols)), w(20, y);
+  std::deque<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType::Zero(rows,cols)), w(20, y);
  v.front() = x;
  w.front() = w.back();
  VERIFY_IS_APPROX(w.front(), w.back());
@ -33,7 +33,7 @@ void check_stddeque_matrix(const MatrixType& m)
    ++wi;
  }

-  v.resize(21);  
+  v.resize(21,MatrixType::Zero(rows,cols));  
  v.back() = x;
  VERIFY_IS_APPROX(v.back(), x);
  v.resize(22,y);
@ -46,8 +46,8 @@ template<typename TransformType>
 void check_stddeque_transform(const TransformType&)
 {
  typedef typename TransformType::MatrixType MatrixType;
-  TransformType x(MatrixType::Random()), y(MatrixType::Random());
-  std::deque<TransformType,Eigen::aligned_allocator<TransformType> > v(10), w(20, y);
+  TransformType x(MatrixType::Random()), y(MatrixType::Random()), ti=TransformType::Identity();
+  std::deque<TransformType,Eigen::aligned_allocator<TransformType> > v(10,ti), w(20, y);
  v.front() = x;
  w.front() = w.back();
  VERIFY_IS_APPROX(w.front(), w.back());
@ -62,7 +62,7 @@ void check_stddeque_transform(const TransformType&)
    ++wi;
  }

-  v.resize(21);
+  v.resize(21,ti);
  v.back() = x;
  VERIFY_IS_APPROX(v.back(), x);
  v.resize(22,y);
@ -75,8 +75,8 @@ template<typename QuaternionType>
 void check_stddeque_quaternion(const QuaternionType&)
 {
  typedef typename QuaternionType::Coefficients Coefficients;
-  QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
-  std::deque<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10), w(20, y);
+  QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
+  std::deque<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10,qi), w(20, y);
  v.front() = x;
  w.front() = w.back();
  VERIFY_IS_APPROX(w.front(), w.back());
@ -91,7 +91,7 @@ void check_stddeque_quaternion(const QuaternionType&)
    ++wi;
  }

-  v.resize(21);
+  v.resize(21,qi);
  v.back() = x;
  VERIFY_IS_APPROX(v.back(), x);
  v.resize(22,y);
--- a/test/stddeque_overload.cpp
+++ b/test/stddeque_overload.cpp
@ -31,7 +31,7 @@ void check_stddeque_matrix(const MatrixType& m)
  Index rows = m.rows();
  Index cols = m.cols();
  MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
-  std::deque<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
+  std::deque<MatrixType> v(10, MatrixType::Zero(rows,cols)), w(20, y);
  v[5] = x;
  w[6] = v[5];
  VERIFY_IS_APPROX(w[6], v[5]);
@ -64,8 +64,8 @@ template<typename TransformType>
 void check_stddeque_transform(const TransformType&)
 {
  typedef typename TransformType::MatrixType MatrixType;
-  TransformType x(MatrixType::Random()), y(MatrixType::Random());
-  std::deque<TransformType> v(10), w(20, y);
+  TransformType x(MatrixType::Random()), y(MatrixType::Random()), ti=TransformType::Identity();
+  std::deque<TransformType> v(10,ti), w(20, y);
  v[5] = x;
  w[6] = v[5];
  VERIFY_IS_APPROX(w[6], v[5]);
@ -75,7 +75,7 @@ void check_stddeque_transform(const TransformType&)
    VERIFY_IS_APPROX(w[i], v[i]);
  }

-  v.resize(21);
+  v.resize(21,ti);
  v[20] = x;
  VERIFY_IS_APPROX(v[20], x);
  v.resize(22,y);
@ -98,8 +98,8 @@ template<typename QuaternionType>
 void check_stddeque_quaternion(const QuaternionType&)
 {
  typedef typename QuaternionType::Coefficients Coefficients;
-  QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
-  std::deque<QuaternionType> v(10), w(20, y);
+  QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
+  std::deque<QuaternionType> v(10,qi), w(20, y);
  v[5] = x;
  w[6] = v[5];
  VERIFY_IS_APPROX(w[6], v[5]);
@ -109,7 +109,7 @@ void check_stddeque_quaternion(const QuaternionType&)
    VERIFY_IS_APPROX(w[i], v[i]);
  }

-  v.resize(21);
+  v.resize(21,qi);
  v[20] = x;
  VERIFY_IS_APPROX(v[20], x);
  v.resize(22,y);
--- a/test/stdlist.cpp
+++ b/test/stdlist.cpp
@ -18,7 +18,7 @@ void check_stdlist_matrix(const MatrixType& m)
  Index rows = m.rows();
  Index cols = m.cols();
  MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
-  std::list<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType(rows,cols)), w(20, y);
+  std::list<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType::Zero(rows,cols)), w(20, y);
  v.front() = x;
  w.front() = w.back();
  VERIFY_IS_APPROX(w.front(), w.back());
@ -33,7 +33,7 @@ void check_stdlist_matrix(const MatrixType& m)
    ++wi;
  }

-  v.resize(21);  
+  v.resize(21, MatrixType::Zero(rows,cols));  
  v.back() = x;
  VERIFY_IS_APPROX(v.back(), x);
  v.resize(22,y);
@ -46,8 +46,8 @@ template<typename TransformType>
 void check_stdlist_transform(const TransformType&)
 {
  typedef typename TransformType::MatrixType MatrixType;
-  TransformType x(MatrixType::Random()), y(MatrixType::Random());
-  std::list<TransformType,Eigen::aligned_allocator<TransformType> > v(10), w(20, y);
+  TransformType x(MatrixType::Random()), y(MatrixType::Random()), ti=TransformType::Identity();
+  std::list<TransformType,Eigen::aligned_allocator<TransformType> > v(10,ti), w(20, y);
  v.front() = x;
  w.front() = w.back();
  VERIFY_IS_APPROX(w.front(), w.back());
@ -62,7 +62,7 @@ void check_stdlist_transform(const TransformType&)
    ++wi;
  }

-  v.resize(21);
+  v.resize(21, ti);
  v.back() = x;
  VERIFY_IS_APPROX(v.back(), x);
  v.resize(22,y);
@ -75,8 +75,8 @@ template<typename QuaternionType>
 void check_stdlist_quaternion(const QuaternionType&)
 {
  typedef typename QuaternionType::Coefficients Coefficients;
-  QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
-  std::list<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10), w(20, y);
+  QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
+  std::list<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10,qi), w(20, y);
  v.front() = x;
  w.front() = w.back();
  VERIFY_IS_APPROX(w.front(), w.back());
@ -91,7 +91,7 @@ void check_stdlist_quaternion(const QuaternionType&)
    ++wi;
  }

-  v.resize(21);
+  v.resize(21,qi);
  v.back() = x;
  VERIFY_IS_APPROX(v.back(), x);
  v.resize(22,y);
--- a/test/stdlist_overload.cpp
+++ b/test/stdlist_overload.cpp
@ -47,7 +47,7 @@ void check_stdlist_matrix(const MatrixType& m)
  Index rows = m.rows();
  Index cols = m.cols();
  MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
-  std::list<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
+  std::list<MatrixType> v(10, MatrixType::Zero(rows,cols)), w(20, y);
  typename std::list<MatrixType>::iterator itv = get(v, 5);
  typename std::list<MatrixType>::iterator itw = get(w, 6);
  *itv = x;
@ -86,8 +86,8 @@ template<typename TransformType>
 void check_stdlist_transform(const TransformType&)
 {
  typedef typename TransformType::MatrixType MatrixType;
-  TransformType x(MatrixType::Random()), y(MatrixType::Random());
-  std::list<TransformType> v(10), w(20, y);
+  TransformType x(MatrixType::Random()), y(MatrixType::Random()), ti=TransformType::Identity();
+  std::list<TransformType> v(10,ti), w(20, y);
  typename std::list<TransformType>::iterator itv = get(v, 5);
  typename std::list<TransformType>::iterator itw = get(w, 6);
  *itv = x;
@ -103,7 +103,7 @@ void check_stdlist_transform(const TransformType&)
    ++itw;
  }

-  v.resize(21);
+  v.resize(21, ti);
  set(v, 20, x);
  VERIFY_IS_APPROX(*get(v, 20), x);
  v.resize(22,y);
@ -126,8 +126,8 @@ template<typename QuaternionType>
 void check_stdlist_quaternion(const QuaternionType&)
 {
  typedef typename QuaternionType::Coefficients Coefficients;
-  QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
-  std::list<QuaternionType> v(10), w(20, y);
+  QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
+  std::list<QuaternionType> v(10,qi), w(20, y);
  typename std::list<QuaternionType>::iterator itv = get(v, 5);
  typename std::list<QuaternionType>::iterator itw = get(w, 6);
  *itv = x;
@ -143,7 +143,7 @@ void check_stdlist_quaternion(const QuaternionType&)
    ++itw;
  }

-  v.resize(21);
+  v.resize(21,qi);
  set(v, 20, x);
  VERIFY_IS_APPROX(*get(v, 20), x);
  v.resize(22,y);
--- a/test/stdvector.cpp
+++ b/test/stdvector.cpp
@ -17,7 +17,7 @@ void check_stdvector_matrix(const MatrixType& m)
  Index rows = m.rows();
  Index cols = m.cols();
  MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
-  std::vector<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType(rows,cols)), w(20, y);
+  std::vector<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType::Zero(rows,cols)), w(20, y);
  v[5] = x;
  w[6] = v[5];
  VERIFY_IS_APPROX(w[6], v[5]);
@ -86,8 +86,8 @@ template<typename QuaternionType>
 void check_stdvector_quaternion(const QuaternionType&)
 {
  typedef typename QuaternionType::Coefficients Coefficients;
-  QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
-  std::vector<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10), w(20, y);
+  QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
+  std::vector<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10,qi), w(20, y);
  v[5] = x;
  w[6] = v[5];
  VERIFY_IS_APPROX(w[6], v[5]);
@ -117,6 +117,16 @@ void check_stdvector_quaternion(const QuaternionType&)
  }
 }

+// the code below triggered an invalid warning with gcc >= 7
+// eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
+// This has been reported to gcc there: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
+void std_vector_gcc_warning()
+{
+  typedef Eigen::Vector3f T;
+  std::vector<T, Eigen::aligned_allocator<T> > v;
+  v.push_back(T());
+}
+
 EIGEN_DECLARE_TEST(stdvector)
 {
  // some non vectorizable fixed sizes
--- a/test/stdvector_overload.cpp
+++ b/test/stdvector_overload.cpp
@ -31,7 +31,7 @@ void check_stdvector_matrix(const MatrixType& m)
  Index rows = m.rows();
  Index cols = m.cols();
  MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
-  std::vector<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
+  std::vector<MatrixType> v(10, MatrixType::Zero(rows,cols)), w(20, y);
  v[5] = x;
  w[6] = v[5];
  VERIFY_IS_APPROX(w[6], v[5]);
@ -100,8 +100,8 @@ template<typename QuaternionType>
 void check_stdvector_quaternion(const QuaternionType&)
 {
  typedef typename QuaternionType::Coefficients Coefficients;
-  QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
-  std::vector<QuaternionType> v(10), w(20, y);
+  QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
+  std::vector<QuaternionType> v(10,qi), w(20, y);
  v[5] = x;
  w[6] = v[5];
  VERIFY_IS_APPROX(w[6], v[5]);
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@ -186,21 +186,21 @@ struct TensorContractionKernel {
      /*ConjugateLhs*/ false, /*ConjugateRhs*/ false>
      GebpKernel;

-  EIGEN_DONT_INLINE
+  EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
  static void packLhs(LhsScalar* lhsBlock,
                      const typename LhsMapper::SubMapper& data_mapper,
                      const StorageIndex depth, const StorageIndex rows) {
    LhsPacker()(lhsBlock, data_mapper, depth, rows, /*stride*/ 0, /*offset*/ 0);
  }

-  EIGEN_DONT_INLINE
+  EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
  static void packRhs(RhsScalar* rhsBlock,
                      const typename RhsMapper::SubMapper& data_mapper,
                      const StorageIndex depth, const StorageIndex cols) {
    RhsPacker()(rhsBlock, data_mapper, depth, cols);
  }

-  EIGEN_DONT_INLINE
+  EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
  static void invoke(const OutputMapper& output_mapper,
                     const LhsScalar* lhsBlock, const RhsScalar* rhsBlock,
                     const StorageIndex rows, const StorageIndex depth,
@ -667,8 +667,8 @@ struct TensorContractionEvaluatorBase
    this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
    this->template evalGemmPartial<lhs_inner_dim_contiguous,
                                   rhs_inner_dim_contiguous,
-                                   rhs_inner_dim_reordered, Alignment>(buffer,
-                                                                       0, k, 1);
+                                   rhs_inner_dim_reordered,
+                                   Alignment, true>(buffer, 0, k, 1);
  }

  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,
@ -681,7 +681,7 @@ struct TensorContractionEvaluatorBase
                                     num_threads);
  }

-  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment, bool use_output_kernel = true>
+  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment, bool use_output_kernel>
  EIGEN_DEVICE_FUNC void evalGemmPartial(Scalar* buffer, Index k_start, Index k_end, int num_threads) const {
    eigen_assert(k_end >= k_start && k_start >= 0 && k_end <= this->m_k_size);
    // columns in slice on left side, rows on right side
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@ -794,7 +794,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
    Index num_blocks = divup<Index>(k, block_size);
    // we use 'result' for the first block's partial result.
    MaxSizeVector<Scalar*> block_buffers(num_blocks - 1);
-    Barrier barrier(num_blocks);
+    Barrier barrier(internal::convert_index<int>(num_blocks));
    auto process_block = [=, &barrier](Scalar* buf, Index begin, Index end) {
      ::memset(buf, 0, m * n * sizeof(Scalar));
      TENSOR_CONTRACTION_DISPATCH(
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@ -195,6 +195,14 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
    m_impl.getResourceRequirements(resources);
  }

+  // required in block(OutputTensorBlock* output_block) const
+  // For C++03 compatibility this must be defined outside the method
+  struct BlockIteratorState {
+    Index stride;
+    Index span;
+    Index size;
+    Index count;
+  };
  // TODO(andydavis) Reduce the overhead of this function.
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
      OutputTensorBlock* output_block) const {
@ -219,12 +227,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
    }

    // Initialize output block iterator state.
-    struct BlockIteratorState {
-      Index stride;
-      Index span;
-      Index size;
-      Index count;
-    };
    array<BlockIteratorState, NumOutputDims> block_iter_state;

    for (Index i = 0; i < NumOutputDims; ++i) {
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@ -218,6 +218,7 @@ struct InnerMostDimReducer<Self, Op, false, true> {
  }
 };

+#if !defined(EIGEN_HIPCC) 
 template <typename Self, typename Op>
 struct InnerMostDimReducer<Self, Op, true, true> {
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
@ -257,7 +258,8 @@ struct InnerMostDimReducer<Self, Op, true, true> {
    }
  }
 };
-
+#endif
+ 
 template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
 struct InnerMostDimPreserver {
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
@ -292,7 +292,7 @@ __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input,
 }

 template <typename Op>
-__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) {
+__global__ void ReductionCleanupKernelHalfFloat(Op reducer, half* output, half2* scratch) {
  eigen_assert(threadIdx.x == 1);
  half tmp = __low2half(*scratch);
  reducer.reduce(__high2half(*scratch), &tmp);
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@ -124,7 +124,11 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
        m_stride = m_stride * dims[i];
      }
    } else {
-      for (int i = NumDims - 1; i > op.axis(); --i) {
+      // dims can only be indexed through unsigned integers,
+      // so let's use an unsigned type to let the compiler knows.
+      // This prevents stupid warnings: ""'*((void*)(& evaluator)+64)[18446744073709551615]' may be used uninitialized in this function"
+      unsigned int axis = internal::convert_index<unsigned int>(op.axis());
+      for (unsigned int i = NumDims - 1; i > axis; --i) {
        m_stride = m_stride * dims[i];
      }
    }
--- a/unsupported/test/cxx11_tensor_reduction.cpp
+++ b/unsupported/test/cxx11_tensor_reduction.cpp
@ -225,11 +225,11 @@ static void test_simple_reductions() {
    Tensor<int, 1> ints(10);
    std::iota(ints.data(), ints.data() + ints.dimension(0), 0);

-    TensorFixedSize<bool, Sizes<> > all;
-    all = ints.all();
-    VERIFY(!all());
-    all = (ints >= ints.constant(0)).all();
-    VERIFY(all());
+    TensorFixedSize<bool, Sizes<> > all_;
+    all_ = ints.all();
+    VERIFY(!all_());
+    all_ = (ints >= ints.constant(0)).all();
+    VERIFY(all_());

    TensorFixedSize<bool, Sizes<> > any;
    any = (ints > ints.constant(10)).any();
--- a/unsupported/test/mpreal/mpreal.h
+++ b/unsupported/test/mpreal/mpreal.h