From 008ff3483a8c5604639e1c4d204eae30ad737af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Mon, 7 Mar 2022 20:30:37 +0000 Subject: [PATCH] Fix broken tensor executor test, allow tensor packets of size 1. --- unsupported/Eigen/CXX11/src/Tensor/README.md | 5 -- .../CXX11/src/Tensor/TensorBroadcasting.h | 5 -- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 3 - .../Eigen/CXX11/src/Tensor/TensorGenerator.h | 1 - .../Eigen/CXX11/src/Tensor/TensorImagePatch.h | 1 - .../Eigen/CXX11/src/Tensor/TensorPadding.h | 2 - .../Eigen/CXX11/src/Tensor/TensorPatch.h | 1 - .../Eigen/CXX11/src/Tensor/TensorReduction.h | 1 - .../Eigen/CXX11/src/Tensor/TensorReverse.h | 2 - .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 5 +- .../Eigen/CXX11/src/Tensor/TensorTrace.h | 2 - .../CXX11/src/Tensor/TensorVolumePatch.h | 1 - unsupported/test/cxx11_tensor_executor.cpp | 55 +++++++++---------- 13 files changed, 28 insertions(+), 56 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/README.md b/unsupported/Eigen/CXX11/src/Tensor/README.md index b6abf2f87..4c069e990 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/README.md +++ b/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -1847,8 +1847,3 @@ product of 2 1d tensors (through contractions) returns a 0d tensor. * The IndexList class requires a cxx11 compliant compiler. You can use an array of indices instead if you don't have access to a modern compiler. * On GPUs only floating point values are properly tested and optimized for. -* Complex and integer values are known to be broken on GPUs. If you try to use - them you'll most likely end up triggering a static assertion failure such as - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - - diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index ce43377ba..37bba2be2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -370,7 +370,6 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne (Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; @@ -416,7 +415,6 @@ struct TensorEvaluator, Device> // Concatenates m_broadcast[dim] copies, // [v0, ..., vN, v0, ..., vN, ... ] // with dim == NumDims - 1 for col-major, dim == 0 for row-major. - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); // Size of flattened tensor. @@ -445,7 +443,6 @@ struct TensorEvaluator, Device> // Interleaves m_broadcast[dim] copies, // [v0, v0, ..., v1, v1, ..., vN, vN, ... ] // with dim == 0 for col-major, dim == NumDims - 1 for row-major. - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index + PacketSize-1 < dimensions().TotalSize()); const Index M = (static_cast(Layout) == static_cast(ColMajor)) ? @@ -476,7 +473,6 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index originalIndex = index; @@ -534,7 +530,6 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index originalIndex = index; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 1c7c46574..a2ffd3fb4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -219,7 +219,6 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); if (isInnerChipping()) { @@ -447,8 +446,6 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - if (this->isInnerChipping()) { // m_stride is equal to 1, so let's avoid the integer division. eigen_assert(this->m_stride == 1); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index 8be5a8c0d..f367641df 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -155,7 +155,6 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { const int packetSize = PacketType::size; - EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+packetSize-1 < dimensions().TotalSize()); EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 8305a74ea..cec95bb1e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -449,7 +449,6 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index d111d80a3..de68013d3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -551,7 +551,6 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index initialIndex = index; @@ -609,7 +608,6 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); const Index initialIndex = index; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h index 64965e871..f43c68c75 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -197,7 +197,6 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); Index output_stride_index = (static_cast(Layout) == static_cast(ColMajor)) ? NumDims - 1 : 0; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 8bbe49edc..eb44be266 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -837,7 +837,6 @@ static const bool RunningOnGPU = false; template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions()))); if (RunningOnGPU && m_result) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index f73578875..74d5726c3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -215,7 +215,6 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); // TODO(ndjaitly): write a better packing routine that uses @@ -448,7 +447,6 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); // This code is pilfered from TensorMorphing.h diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 487cb026e..3b68c403e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -227,8 +227,7 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + PacketSize - 1 < dimensions().TotalSize()); + eigen_assert(index + PacketSize - 1 < dimensions().TotalSize()); return PacketLoader::PacketAccess>::Run(*this, index); } @@ -399,8 +398,6 @@ struct TensorEvaluator, Device> template EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType& x) { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; internal::pstore(values, x); EIGEN_UNROLL_LOOP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h index 40bdc0a25..51409069d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h @@ -245,8 +245,6 @@ struct TensorEvaluator, Device> template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(index + PacketSize - 1 < dimensions().TotalSize()); EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h index df7f293eb..3f0b3c278 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h @@ -421,7 +421,6 @@ struct TensorEvaluator, D template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 || diff --git a/unsupported/test/cxx11_tensor_executor.cpp b/unsupported/test/cxx11_tensor_executor.cpp index 66b06e8ee..d0103dcbe 100644 --- a/unsupported/test/cxx11_tensor_executor.cpp +++ b/unsupported/test/cxx11_tensor_executor.cpp @@ -612,43 +612,42 @@ static void test_async_execute_binary_expr(Device d) } } -#ifdef EIGEN_DONT_VECTORIZE -#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL -#else -#define VECTORIZABLE(VAL) VAL +#ifndef EIGEN_DONT_VECTORIZE +#define EIGEN_DONT_VECTORIZE 0 #endif +#define VECTORIZABLE(T, VAL) !EIGEN_DONT_VECTORIZE && Eigen::internal::packet_traits::Vectorizable && VAL #define CALL_SUBTEST_PART(PART) \ CALL_SUBTEST_##PART #define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(default_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))) + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(default_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))) // NOTE: Currently only ThreadPoolDevice supports async expression evaluation. #define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ - CALL_SUBTEST_PART(PART)((NAME(tp_device))) + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))); \ + CALL_SUBTEST_PART(PART)((NAME(tp_device))) EIGEN_DECLARE_TEST(cxx11_tensor_executor) { Eigen::DefaultDevice default_device;