From 4222550e173c344ef60e14cb2a18fc0bf8968a51 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 13 Jul 2018 16:12:38 -0700 Subject: [PATCH 1/2] Optimize the case where broadcasting is a no-op. --- .../CXX11/src/Tensor/TensorBroadcasting.h | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index b35b36475..3b934a5fd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -105,7 +105,7 @@ struct TensorEvaluator, Device> typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = internal::unpacket_traits::size; - bool nByOne = false, oneByN = false; + bool isCopy= false, nByOne = false, oneByN = false; enum { IsAligned = true, @@ -123,9 +123,13 @@ struct TensorEvaluator, Device> EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); const InputDimensions& input_dims = m_impl.dimensions(); const Broadcast& broadcast = op.broadcast(); + isCopy = true; for (int i = 0; i < NumDims; ++i) { eigen_assert(input_dims[i] > 0); - m_dimensions[i] = input_dims[i] * broadcast[i]; + m_dimensions[i] = input_dims[i] * m_broadcast[i]; + if (m_broadcast[i] != 1) { + isCopy = false; + } } if (static_cast(Layout) == static_cast(ColMajor)) { @@ -197,9 +201,17 @@ struct TensorEvaluator, Device> } if (static_cast(Layout) == static_cast(ColMajor)) { - return coeffColMajor(index); + if (isCopy) { + return m_impl.coeff(index); + } else { + return coeffColMajor(index); + } } else { - return coeffRowMajor(index); + if (isCopy) { + return m_impl.coeff(index); + } else { + return coeffRowMajor(index); + } } } @@ -272,7 +284,9 @@ struct TensorEvaluator, Device> } if (static_cast(Layout) == static_cast(ColMajor)) { - if (oneByN && !nByOne) { + if (isCopy) { + return m_impl.template packet(index); + } else if (oneByN && !nByOne) { return packetNByOne(index); } else if (!oneByN && nByOne) { return packetOneByN(index); @@ -282,7 +296,9 @@ struct TensorEvaluator, Device> return packetColMajor(index); } } else { - if (oneByN && !nByOne) { + if (isCopy) { + return m_impl.template packet(index); + } else if (oneByN && !nByOne) { return packetOneByN(index); } else if (!oneByN && nByOne) { return packetNByOne(index); @@ -516,7 +532,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { double compute_cost = TensorOpCost::AddCost(); - if (NumDims > 0) { + if (!isCopy && NumDims > 0) { for (int i = NumDims - 1; i > 0; --i) { compute_cost += TensorOpCost::DivCost(); if (internal::index_statically_eq(i, 1)) { From 3a9cf4e290c99588a22fcc9d288968f669809306 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 13 Jul 2018 16:24:48 -0700 Subject: [PATCH 2/2] Get rid of alias for m_broadcast. --- unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 3b934a5fd..278689915 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -122,7 +122,6 @@ struct TensorEvaluator, Device> // tensor with N >= 1 of 1 element first and then broadcast. EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); const InputDimensions& input_dims = m_impl.dimensions(); - const Broadcast& broadcast = op.broadcast(); isCopy = true; for (int i = 0; i < NumDims; ++i) { eigen_assert(input_dims[i] > 0); @@ -151,7 +150,7 @@ struct TensorEvaluator, Device> if (input_dims[0] == 1) { oneByN = true; for (int i = 1; i < NumDims; ++i) { - if (broadcast[i] != 1) { + if (m_broadcast[i] != 1) { oneByN = false; break; } @@ -159,7 +158,7 @@ struct TensorEvaluator, Device> } else if (input_dims[NumDims-1] == 1) { nByOne = true; for (int i = 0; i < NumDims-1; ++i) { - if (broadcast[i] != 1) { + if (m_broadcast[i] != 1) { nByOne = false; break; } @@ -173,7 +172,7 @@ struct TensorEvaluator, Device> nByOne = true; oneByN = true; for (int i = 1; i < NumDims-1; ++i) { - if (broadcast[i] != 1) { + if (m_broadcast[i] != 1) { nByOne = false; oneByN = false; break;