diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 28986d9a2..70b90fbdb 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -70,7 +70,7 @@ struct ei_traits > MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime, Flags = ((int(LhsFlags) | int(RhsFlags)) & ( HereditaryBits - | Like1DArrayBit + | int(LhsFlags) & int(RhsFlags) & Like1DArrayBit | (ei_functor_traits::IsVectorizable && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit)) ? int(LhsFlags) & int(RhsFlags) & VectorizableBit : 0))), CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits::Cost diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index 8ec1452e1..527d85b12 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -62,6 +62,9 @@ template clas inline Flagged(const ExpressionType& matrix) : m_expression(matrix) {} + /** \internal */ + inline ExpressionType _expression() const { return m_expression; } + private: inline int _rows() const { return m_expression.rows(); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 101571ea1..50c4edfc8 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -163,7 +163,7 @@ template class MatrixBase * \sa rows(), cols(), IsVectorAtCompileTime. */ inline bool isVector() const { return rows()==1 || cols()==1; } //@} - + /// \name Default return types //@{ /** Represents a constant matrix */ @@ -255,6 +255,9 @@ template class MatrixBase template Derived& operator-=(const MatrixBase& other); + template + Derived& operator+=(const Flagged, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other); + Derived& operator*=(const Scalar& other); Derived& operator/=(const Scalar& other); @@ -407,7 +410,7 @@ template class MatrixBase bool isOrtho(const MatrixBase& other, RealScalar prec = precision()) const; bool isOrtho(RealScalar prec = precision()) const; - + template inline bool operator==(const MatrixBase& other) const { return derived().cwiseEqualTo(other.derived()).all(); } @@ -583,8 +586,8 @@ template class MatrixBase //@{ const QR::type> qr() const; //@} - - + + #ifdef EIGEN_MATRIX_CUSTOM_ADDONS_FILE #include EIGEN_MATRIX_CUSTOM_ADDONS_FILE #endif diff --git a/Eigen/src/Core/ProductWIP.h b/Eigen/src/Core/ProductWIP.h index 7cc913d72..6815b28ab 100644 --- a/Eigen/src/Core/ProductWIP.h +++ b/Eigen/src/Core/ProductWIP.h @@ -183,12 +183,12 @@ template struct ei_product_nested_rhs template struct ei_product_nested_lhs { typedef typename ei_meta_if< - ei_is_temporary::ret && !(ei_traits::Flags & RowMajorBit), + ei_is_temporary::ret, T, - typename ei_meta_if<( - (ei_traits::Flags & EvalBeforeNestingBit) - || (!(ei_traits::Flags & DirectAccessBit)) - || (n+1) * (NumTraits::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost), + typename ei_meta_if< + int(ei_traits::Flags) & EvalBeforeNestingBit + || (!(int(ei_traits::Flags) & DirectAccessBit)) + || (n+1) * int(NumTraits::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost), typename ei_eval::type, const T& >::ret @@ -209,8 +209,8 @@ struct ei_traits > typedef typename ei_meta_if::type, typename ei_nested::type>::ret RhsNested; - typedef typename ei_unref::type _LhsNested; - typedef typename ei_unref::type _RhsNested; + typedef typename ei_unconst::type>::type _LhsNested; + typedef typename ei_unconst::type>::type _RhsNested; enum { LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, @@ -224,7 +224,7 @@ struct ei_traits > // the other one is always vectorized ! _RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits::size == 0), _LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits::size == 0), - _Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 0 : 0, + _Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0, _RowMajor = (RhsFlags & RowMajorBit) && (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)), _LostBits = HereditaryBits & ~( @@ -271,6 +271,10 @@ template class Product : ei_no_assignm template void _cacheFriendlyEval(DestDerived& res) const; + /** \internal */ + template + void _cacheFriendlyEvalAndAdd(DestDerived& res) const; + private: inline int _rows() const { return m_lhs.rows(); } @@ -363,6 +367,16 @@ MatrixBase::operator*=(const MatrixBase &other) return *this = *this * other; } +/** \internal */ +template +template +inline Derived& +MatrixBase::operator+=(const Flagged, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) +{ + other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived()); + return derived(); +} + template template inline Derived& MatrixBase::lazyAssign(const Product& product) @@ -375,16 +389,50 @@ template template inline void Product::_cacheFriendlyEval(DestDerived& res) const { - #ifndef EIGEN_WIP_PRODUCT_DIRTY - res.setZero(); - #endif + if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + ) + { + #ifndef EIGEN_WIP_PRODUCT_DIRTY + res.setZero(); + #endif - ei_cache_friendly_product( - _rows(), _cols(), m_lhs.cols(), - _LhsNested::Flags&RowMajorBit, &(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(), - _RhsNested::Flags&RowMajorBit, &(m_rhs.const_cast_derived().coeffRef(0,0)), m_rhs.stride(), - Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() - ); + ei_cache_friendly_product( + _rows(), _cols(), m_lhs.cols(), + _LhsNested::Flags&RowMajorBit, &(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(), + _RhsNested::Flags&RowMajorBit, &(m_rhs.const_cast_derived().coeffRef(0,0)), m_rhs.stride(), + Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() + ); + } + else + { + // lazy product + res = Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy(); + } +} + +template +template +inline void Product::_cacheFriendlyEvalAndAdd(DestDerived& res) const +{ + if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + ) + { + ei_cache_friendly_product( + _rows(), _cols(), m_lhs.cols(), + _LhsNested::Flags&RowMajorBit, &(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(), + _RhsNested::Flags&RowMajorBit, &(m_rhs.const_cast_derived().coeffRef(0,0)), m_rhs.stride(), + Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() + ); + } + else + { + // lazy product + res += Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy(); + } } #endif // EIGEN_PRODUCT_H diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index e3cf3ba6b..8d9e4c00a 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -154,7 +154,7 @@ class ei_corrected_matrix_flags enum { is_vectorizable = ei_packet_traits::size > 1 && (Size%ei_packet_traits::size==0), - _flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit + _flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit | DirectAccessBit }; public: