improve the expression analyzer to bypass Transpose expression

This commit is contained in:
Gael Guennebaud 2009-07-28 14:02:12 +02:00
parent 6713c75fac
commit 7ed7ec64b5
5 changed files with 76 additions and 61 deletions

View File

@ -79,8 +79,6 @@ struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct>
*/
template<typename Lhs, typename Rhs> struct ei_product_mode
{
typedef typename ei_blas_traits<Lhs>::ActualXprType ActualLhs;
typedef typename ei_blas_traits<Rhs>::ActualXprType ActualRhs;
enum{
// workaround sun studio:
LhsIsVectorAtCompileTime = ei_traits<Lhs>::ColsAtCompileTime==1 || ei_traits<Rhs>::ColsAtCompileTime==1,
@ -552,14 +550,14 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
typedef ei_blas_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits;
typedef ei_blas_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits;
typedef typename LhsProductTraits::ActualXprType ActualLhsType;
typedef typename RhsProductTraits::ActualXprType ActualRhsType;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename RhsProductTraits::ExtractType ActualRhsType;
template<typename DestDerived>
inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha)
{
const ActualLhsType& actualLhs = LhsProductTraits::extract(product.lhs());
const ActualRhsType& actualRhs = RhsProductTraits::extract(product.rhs());
ActualLhsType actualLhs = LhsProductTraits::extract(product.lhs());
ActualRhsType actualRhs = RhsProductTraits::extract(product.rhs());
Scalar actualAlpha = alpha * LhsProductTraits::extractScalarFactor(product.lhs())
* RhsProductTraits::extractScalarFactor(product.rhs());
@ -613,14 +611,14 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
typedef ei_blas_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits;
typedef ei_blas_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits;
typedef typename LhsProductTraits::ActualXprType ActualLhsType;
typedef typename RhsProductTraits::ActualXprType ActualRhsType;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename RhsProductTraits::ExtractType ActualRhsType;
template<typename DestDerived>
inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha)
{
const ActualLhsType& actualLhs = LhsProductTraits::extract(product.lhs());
const ActualRhsType& actualRhs = RhsProductTraits::extract(product.rhs());
ActualLhsType actualLhs = LhsProductTraits::extract(product.lhs());
ActualRhsType actualRhs = RhsProductTraits::extract(product.rhs());
Scalar actualAlpha = alpha * LhsProductTraits::extractScalarFactor(product.lhs())
* RhsProductTraits::extractScalarFactor(product.rhs());
@ -659,18 +657,19 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect
typedef ei_blas_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits;
typedef ei_blas_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits;
typedef typename LhsProductTraits::ActualXprType ActualLhsType;
typedef typename RhsProductTraits::ActualXprType ActualRhsType;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename RhsProductTraits::ExtractType ActualRhsType;
typedef typename ei_cleantype<ActualRhsType>::type _ActualRhsType;
enum {
UseRhsDirectly = ((ei_packet_traits<Scalar>::size==1) || (ActualRhsType::Flags&ActualPacketAccessBit))
&& (!(ActualRhsType::Flags & RowMajorBit)) };
UseRhsDirectly = ((ei_packet_traits<Scalar>::size==1) || (_ActualRhsType::Flags&ActualPacketAccessBit))
&& (!(_ActualRhsType::Flags & RowMajorBit)) };
template<typename DestDerived>
inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha)
{
const ActualLhsType& actualLhs = LhsProductTraits::extract(product.lhs());
const ActualRhsType& actualRhs = RhsProductTraits::extract(product.rhs());
ActualLhsType actualLhs = LhsProductTraits::extract(product.lhs());
ActualRhsType actualRhs = RhsProductTraits::extract(product.rhs());
Scalar actualAlpha = alpha * LhsProductTraits::extractScalarFactor(product.lhs())
* RhsProductTraits::extractScalarFactor(product.rhs());
@ -681,7 +680,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect
else
{
_rhs = ei_aligned_stack_new(Scalar, actualRhs.size());
Map<Matrix<Scalar,ActualRhsType::SizeAtCompileTime,1> >(_rhs, actualRhs.size()) = actualRhs;
Map<Matrix<Scalar,_ActualRhsType::SizeAtCompileTime,1> >(_rhs, actualRhs.size()) = actualRhs;
}
ei_cache_friendly_product_rowmajor_times_vector
@ -702,18 +701,19 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
typedef ei_blas_traits<typename ei_traits<ProductType>::_LhsNested> LhsProductTraits;
typedef ei_blas_traits<typename ei_traits<ProductType>::_RhsNested> RhsProductTraits;
typedef typename LhsProductTraits::ActualXprType ActualLhsType;
typedef typename RhsProductTraits::ActualXprType ActualRhsType;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef typename RhsProductTraits::ExtractType ActualRhsType;
typedef typename ei_cleantype<ActualLhsType>::type _ActualLhsType;
enum {
UseLhsDirectly = ((ei_packet_traits<Scalar>::size==1) || (ActualLhsType::Flags&ActualPacketAccessBit))
&& (ActualLhsType::Flags & RowMajorBit) };
UseLhsDirectly = ((ei_packet_traits<Scalar>::size==1) || (_ActualLhsType::Flags&ActualPacketAccessBit))
&& (_ActualLhsType::Flags & RowMajorBit) };
template<typename DestDerived>
inline static void run(DestDerived& res, const ProductType& product, typename ProductType::Scalar alpha)
{
const ActualLhsType& actualLhs = LhsProductTraits::extract(product.lhs());
const ActualRhsType& actualRhs = RhsProductTraits::extract(product.rhs());
ActualLhsType actualLhs = LhsProductTraits::extract(product.lhs());
ActualRhsType actualRhs = RhsProductTraits::extract(product.rhs());
Scalar actualAlpha = alpha * LhsProductTraits::extractScalarFactor(product.lhs())
* RhsProductTraits::extractScalarFactor(product.rhs());
@ -724,7 +724,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
else
{
_lhs = ei_aligned_stack_new(Scalar, actualLhs.size());
Map<Matrix<Scalar,ActualLhsType::SizeAtCompileTime,1> >(_lhs, actualLhs.size()) = actualLhs;
Map<Matrix<Scalar,_ActualLhsType::SizeAtCompileTime,1> >(_lhs, actualLhs.size()) = actualLhs;
}
ei_cache_friendly_product_rowmajor_times_vector

View File

@ -40,14 +40,14 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,RowMajor,1>
{
typedef typename Rhs::Scalar Scalar;
typedef ei_blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ActualXprType ActualLhsType;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
enum {
IsLowerTriangular = ((Mode&LowerTriangularBit)==LowerTriangularBit)
};
static void run(const Lhs& lhs, Rhs& other)
{
static const int PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
const ActualLhsType& actualLhs = LhsProductTraits::extract(lhs);
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
const int size = lhs.cols();
for(int pi=IsLowerTriangular ? 0 : size;
@ -94,7 +94,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,ColMajor,1>
typedef typename Rhs::Scalar Scalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
typedef ei_blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ActualXprType ActualLhsType;
typedef typename LhsProductTraits::ExtractType ActualLhsType;
enum {
PacketSize = ei_packet_traits<Scalar>::size,
IsLowerTriangular = ((Mode&LowerTriangularBit)==LowerTriangularBit)
@ -103,7 +103,7 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,ColMajor,1>
static void run(const Lhs& lhs, Rhs& other)
{
static const int PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
const ActualLhsType& actualLhs = LhsProductTraits::extract(lhs);
ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
const int size = lhs.cols();
for(int pi=IsLowerTriangular ? 0 : size;
@ -151,10 +151,10 @@ struct ei_triangular_solver_selector<Lhs,Rhs,Mode,NoUnrolling,StorageOrder,RhsCo
{
typedef typename Rhs::Scalar Scalar;
typedef ei_blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ActualXprType ActualLhsType;
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
static void run(const Lhs& lhs, Rhs& rhs)
{
const ActualLhsType& actualLhs = LhsProductTraits::extract(lhs);
const ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
ei_triangular_solve_matrix<Scalar,StorageOrder,LhsProductTraits::NeedToConjugate,Rhs::Flags&RowMajorBit,Mode>
::run(lhs.rows(), rhs.cols(), &actualLhs.coeff(0,0), actualLhs.stride(), &rhs.coeffRef(0,0), rhs.stride());
}

View File

@ -116,6 +116,10 @@ template<typename MatrixType> class Transpose
m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x);
}
/** \internal used for introspection */
const typename ei_cleantype<typename MatrixType::Nested>::type&
_expression() const { return m_matrix; }
protected:
const typename MatrixType::Nested m_matrix;
};

View File

@ -344,13 +344,6 @@ struct ei_triangular_product_returntype<Mode,LhsIsTriangular,Lhs,false,Rhs,false
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef typename ei_cleantype<ActualRhsType>::type _ActualRhsType;
// enum {
// LhsUpLo = LhsMode&(UpperTriangularBit|LowerTriangularBit),
// LhsIsTriangular = (LhsMode&SelfAdjointBit)==SelfAdjointBit,
// RhsUpLo = RhsMode&(UpperTriangularBit|LowerTriangularBit),
// RhsIsSelfAdjoint = (RhsMode&SelfAdjointBit)==SelfAdjointBit
// };
template<typename Dest> inline void _addTo(Dest& dst) const
{ evalTo(dst,1); }
template<typename Dest> inline void _subTo(Dest& dst) const

View File

@ -95,7 +95,9 @@ template<> struct ei_conj_helper<true,true>
{ return std::complex<T>(ei_real(x)*ei_real(y) - ei_imag(x)*ei_imag(y), - ei_real(x)*ei_imag(y) - ei_imag(x)*ei_real(y)); }
};
// lightweight helper class to access matrix coefficients
// Lightweight helper class to access matrix coefficients.
// Yes, this is somehow redundant with Map<>, but this version is much much lighter,
// and so I hope better compilation performance (time and code quality).
template<typename Scalar, int StorageOrder>
class ei_blas_data_mapper
{
@ -121,12 +123,6 @@ class ei_const_blas_data_mapper
int m_stride;
};
//
// template <int L2MemorySize,typename Scalar>
// struct ei_L2_block_traits {
// enum {width = 8 * ei_meta_sqrt<L2MemorySize/(64*sizeof(Scalar))>::ret };
// };
// Defines various constant controlling level 3 blocking
template<typename Scalar>
struct ei_product_blocking_traits
@ -160,71 +156,93 @@ struct ei_product_blocking_traits
template<typename XprType> struct ei_blas_traits
{
typedef typename ei_traits<XprType>::Scalar Scalar;
typedef XprType ActualXprType;
typedef const XprType& ExtractType;
typedef XprType _ExtractType;
enum {
IsComplex = NumTraits<Scalar>::IsComplex,
NeedToConjugate = false,
ActualAccess = int(ei_traits<XprType>::Flags)&DirectAccessBit ? HasDirectAccess : NoDirectAccess
};
typedef typename ei_meta_if<int(ActualAccess)==HasDirectAccess,
const ActualXprType&,
typename ActualXprType::PlainMatrixType
ExtractType,
typename _ExtractType::PlainMatrixType
>::ret DirectLinearAccessType;
static inline const ActualXprType& extract(const XprType& x) { return x; }
static inline ExtractType extract(const XprType& x) { return x; }
static inline Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
};
// pop conjugate
template<typename Scalar, typename NestedXpr> struct ei_blas_traits<CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, NestedXpr> >
template<typename Scalar, typename NestedXpr>
struct ei_blas_traits<CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, NestedXpr> >
: ei_blas_traits<NestedXpr>
{
typedef ei_blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ActualXprType ActualXprType;
typedef typename Base::ExtractType ExtractType;
enum {
IsComplex = NumTraits<Scalar>::IsComplex,
NeedToConjugate = IsComplex
};
static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); }
static inline ExtractType extract(const XprType& x) { return Base::extract(x._expression()); }
static inline Scalar extractScalarFactor(const XprType& x) { return ei_conj(Base::extractScalarFactor(x._expression())); }
};
// pop scalar multiple
template<typename Scalar, typename NestedXpr> struct ei_blas_traits<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, NestedXpr> >
template<typename Scalar, typename NestedXpr>
struct ei_blas_traits<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, NestedXpr> >
: ei_blas_traits<NestedXpr>
{
typedef ei_blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ActualXprType ActualXprType;
static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); }
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x._expression()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return x._functor().m_other * Base::extractScalarFactor(x._expression()); }
};
// pop opposite
template<typename Scalar, typename NestedXpr> struct ei_blas_traits<CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, NestedXpr> >
template<typename Scalar, typename NestedXpr>
struct ei_blas_traits<CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, NestedXpr> >
: ei_blas_traits<NestedXpr>
{
typedef ei_blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ActualXprType ActualXprType;
static inline const ActualXprType& extract(const XprType& x) { return Base::extract(x._expression()); }
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x._expression()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return - Base::extractScalarFactor(x._expression()); }
};
// pop opposite
template<typename NestedXpr> struct ei_blas_traits<NestByValue<NestedXpr> >
// pop NestByValue
template<typename NestedXpr>
struct ei_blas_traits<NestByValue<NestedXpr> >
: ei_blas_traits<NestedXpr>
{
typedef typename NestedXpr::Scalar Scalar;
typedef ei_blas_traits<NestedXpr> Base;
typedef NestByValue<NestedXpr> XprType;
typedef typename Base::ActualXprType ActualXprType;
static inline const ActualXprType& extract(const XprType& x) { return Base::extract(static_cast<const NestedXpr&>(x)); }
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(static_cast<const NestedXpr&>(x)); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return Base::extractScalarFactor(static_cast<const NestedXpr&>(x)); }
};
// pop/push transpose
template<typename NestedXpr>
struct ei_blas_traits<Transpose<NestedXpr> >
: ei_blas_traits<NestedXpr>
{
typedef typename NestedXpr::Scalar Scalar;
typedef ei_blas_traits<NestedXpr> Base;
typedef Transpose<NestedXpr> XprType;
typedef Transpose<typename Base::_ExtractType> ExtractType;
typedef typename ei_meta_if<int(Base::ActualAccess)==HasDirectAccess,
ExtractType,
typename ExtractType::PlainMatrixType
>::ret DirectLinearAccessType;
static inline const ExtractType extract(const XprType& x) { return Base::extract(x._expression()); }
static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x._expression()); }
};
#endif // EIGEN_BLASUTIL_H