diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 56f4c956e..bf5ede1bc 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -208,12 +208,12 @@ struct ei_assignment_impl for ( ; index::size) { // FIXME the following is not really efficient - int i = index/dst.rows(); - int j = index%dst.rows(); + int i = index/dst.cols(); + int j = index%dst.cols(); dst.template writePacketCoeff(i, j, src.template packetCoeff(i, j)); } - for(int i = alignedSize/dst.rows(); i < dst.rows(); i++) - for(int j = alignedSize%dst.rows(); j < dst.cols(); j++) + for(int i = alignedSize/dst.cols(); i < dst.rows(); i++) + for(int j = alignedSize%dst.cols(); j < dst.cols(); j++) dst.coeffRef(i, j) = src.coeff(i, j); } else diff --git a/Eigen/src/Core/CacheFriendlyProduct.h b/Eigen/src/Core/CacheFriendlyProduct.h index b484b1786..5e5d075b1 100644 --- a/Eigen/src/Core/CacheFriendlyProduct.h +++ b/Eigen/src/Core/CacheFriendlyProduct.h @@ -45,7 +45,7 @@ static void ei_cache_friendly_product( rhsStride = _lhsStride; cols = _rows; rows = _cols; - lhsRowMajor = _rhsRowMajor; + lhsRowMajor = !_rhsRowMajor; ei_assert(_lhsRowMajor); } else diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h index 1107e39aa..925066533 100644 --- a/Eigen/src/Core/Flagged.h +++ b/Eigen/src/Core/Flagged.h @@ -43,6 +43,7 @@ template struct ei_traits > { typedef typename ExpressionType::Scalar Scalar; + enum { RowsAtCompileTime = ExpressionType::RowsAtCompileTime, ColsAtCompileTime = ExpressionType::ColsAtCompileTime, @@ -59,11 +60,13 @@ template clas public: EIGEN_GENERIC_PUBLIC_INTERFACE(Flagged) + typedef typename ei_meta_if::ret, + ExpressionType, const ExpressionType&>::ret ExpressionTypeNested; inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} /** \internal */ - inline ExpressionType _expression() const { return m_matrix; } + inline const ExpressionType& _expression() const { return m_matrix; } private: @@ -94,7 +97,7 @@ template clas } protected: - typename ExpressionType::Nested m_matrix; + ExpressionTypeNested m_matrix; }; /** \returns an expression of *this with added flags diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index cd7d9ca93..2c8c73c88 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -165,12 +165,10 @@ template class ei_product_eval_to_column_major template struct ei_product_nested_rhs { typedef typename ei_meta_if< - ei_must_nest_by_value::ret && (!(ei_traits::Flags & RowMajorBit)) && (int(ei_traits::Flags) & DirectAccessBit), + ei_must_nest_by_value::ret, T, typename ei_meta_if< ((ei_traits::Flags & EvalBeforeNestingBit) - || (ei_traits::Flags & RowMajorBit) - || (!(ei_traits::Flags & DirectAccessBit)) || (n+1) * (NumTraits::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost), typename ei_product_eval_to_column_major::type, const T& @@ -178,19 +176,38 @@ template struct ei_product_nested_rhs >::ret type; }; -template struct ei_product_nested_lhs +// template struct ei_product_nested_lhs +// { +// typedef typename ei_meta_if< +// ei_must_nest_by_value::ret && (int(ei_traits::Flags) & DirectAccessBit), +// T, +// typename ei_meta_if< +// int(ei_traits::Flags) & EvalBeforeNestingBit +// || (!(int(ei_traits::Flags) & DirectAccessBit)) +// || (n+1) * int(NumTraits::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost), +// typename ei_eval::type, +// const T& +// >::ret +// >::ret type; +// }; + +template struct ei_product_copy_rhs { typedef typename ei_meta_if< - ei_must_nest_by_value::ret && (int(ei_traits::Flags) & DirectAccessBit), - T, - typename ei_meta_if< - int(ei_traits::Flags) & EvalBeforeNestingBit - || (!(int(ei_traits::Flags) & DirectAccessBit)) - || (n+1) * int(NumTraits::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost), + (ei_traits::Flags & RowMajorBit) + || (!(ei_traits::Flags & DirectAccessBit)), + typename ei_product_eval_to_column_major::type, + const T& + >::ret type; +}; + +template struct ei_product_copy_lhs +{ + typedef typename ei_meta_if< + (!(int(ei_traits::Flags) & DirectAccessBit)), typename ei_eval::type, const T& - >::ret - >::ret type; + >::ret type; }; template @@ -199,9 +216,9 @@ struct ei_traits > typedef typename Lhs::Scalar Scalar; // the cache friendly product evals lhs once only // FIXME what to do if we chose to dynamically call the normal product from the cache friendly one for small matrices ? - typedef typename ei_meta_if::type, - typename ei_nested::type>::ret LhsNested; + typedef /*typename ei_meta_if::type, + typename ei_nested::type/*>::ret*/ LhsNested; // NOTE that rhs must be ColumnMajor, so we might need a special nested type calculation typedef typename ei_meta_if > _Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0, _RowMajor = (RhsFlags & RowMajorBit) && (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)), - _LostBits = HereditaryBits & ~( - (_RowMajor ? 0 : RowMajorBit) - | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)), - Flags = ((unsigned int)(LhsFlags | RhsFlags) & _LostBits & ~NestedByValue) + _LostBits = ~((_RowMajor ? 0 : RowMajorBit) + | ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)), + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & _LostBits) | EvalBeforeAssigningBit | EvalBeforeNestingBit | (_Vectorizable ? VectorizableBit : 0), @@ -369,6 +385,7 @@ template inline Derived& MatrixBase::operator+=(const Flagged, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other) { + std::cout << "_cacheFriendlyEvalAndAdd\n"; other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived()); return derived(); } @@ -396,6 +413,7 @@ struct ei_cache_friendly_selector ) { res.setZero(); +// typename ei_product_copy_lhs<>::type ei_cache_friendly_product( product._rows(), product._cols(), product.m_lhs.cols(), _LhsNested::Flags&RowMajorBit, &(product.m_lhs.const_cast_derived().coeffRef(0,0)), product.m_lhs.stride(), @@ -452,18 +470,70 @@ template template inline void Product::_cacheFriendlyEval(DestDerived& res) const { - ei_cache_friendly_selector - ::eval(*this, res); +// ei_cache_friendly_selector +// ::eval(*this, res); + + if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + ) + { + res.setZero(); + + +// typedef typename ei_eval<_LhsNested>::type LhsCopy; +// typedef typename ei_product_eval_to_column_major<_RhsNested>::type RhsCopy; + typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy; + typedef typename ei_unref::type _LhsCopy; + typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy; + typedef typename ei_unref::type _RhsCopy; + LhsCopy lhs(m_lhs); + RhsCopy rhs(m_rhs); + ei_cache_friendly_product( + _rows(), _cols(), lhs.cols(), + _LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(), + _RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(), + Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() + ); + } + else + { + res = Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy(); + } + } template template inline void Product::_cacheFriendlyEvalAndAdd(DestDerived& res) const { - ei_cache_friendly_selector - ::eval_and_add(*this, res); + std::cout << "_cacheFriendlyEvalAndAdd\n"; +// ei_cache_friendly_selector +// ::eval_and_add(*this, res); + if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + && m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + ) + { + typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy; + typedef typename ei_unref::type _LhsCopy; + typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy; + typedef typename ei_unref::type _RhsCopy; + LhsCopy lhs(m_lhs); + RhsCopy rhs(m_rhs); + ei_cache_friendly_product( + _rows(), _cols(), lhs.cols(), + _LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(), + _RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(), + Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride() + ); + } + else + { + res += Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy(); + } } #endif // EIGEN_PRODUCT_H