From 9d6d0dff8f0c1e8630996c3a4867ff0599566b33 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 1 Oct 2016 15:37:00 +0200 Subject: [PATCH] bug #1317: fix performance regression with some Block expressions and clang by helping it to remove dead code. The trick is to get rid of the nested expression in the evaluator by copying only the required information (here, the strides). --- Eigen/src/Core/CoreEvaluators.h | 52 ++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 7a5540593..00c079bd8 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -817,73 +817,79 @@ struct mapbase_evaluator : evaluator_base ColsAtCompileTime = XprType::ColsAtCompileTime, CoeffReadCost = NumTraits::ReadCost }; - + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) - : m_data(const_cast(map.data())), - m_xpr(map) + : m_data(const_cast(map.data())), + m_innerStride(map.innerStride()), + m_outerStride(map.outerStride()) { EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + return m_data[col * colStride() + row * rowStride()]; } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_data[index * m_xpr.innerStride()]; + return m_data[index * m_innerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; + return m_data[col * colStride() + row * rowStride()]; } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_data[index * m_xpr.innerStride()]; + return m_data[index * m_innerStride.value()]; } - + template EIGEN_STRONG_INLINE - PacketType packet(Index row, Index col) const + PacketType packet(Index row, Index col) const { - PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + PointerType ptr = m_data + row * rowStride() + col * colStride(); return internal::ploadt(ptr); } template EIGEN_STRONG_INLINE - PacketType packet(Index index) const + PacketType packet(Index index) const { - return internal::ploadt(m_data + index * m_xpr.innerStride()); + return internal::ploadt(m_data + index * m_innerStride.value()); } - + template EIGEN_STRONG_INLINE - void writePacket(Index row, Index col, const PacketType& x) + void writePacket(Index row, Index col, const PacketType& x) { - PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); + PointerType ptr = m_data + row * rowStride() + col * colStride(); return internal::pstoret(ptr, x); } - + template EIGEN_STRONG_INLINE - void writePacket(Index index, const PacketType& x) + void writePacket(Index index, const PacketType& x) { - internal::pstoret(m_data + index * m_xpr.innerStride(), x); + internal::pstoret(m_data + index * m_innerStride.value(), x); } - protected: + EIGEN_DEVICE_FUNC + inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); } + EIGEN_DEVICE_FUNC + inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); } + PointerType m_data; - const XprType& m_xpr; + const internal::variable_if_dynamic m_innerStride; + const internal::variable_if_dynamic m_outerStride; }; template