allow vectorization of mat44.col() by adding a InnerPanel boolean

template parameter to Block
This commit is contained in:
Gael Guennebaud 2010-07-23 16:29:29 +02:00
parent 853c0e15df
commit aa2b46aa91
6 changed files with 30 additions and 17 deletions

View File

@ -58,8 +58,8 @@
* *
* \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock
*/ */
template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess>
struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_traits<XprType> struct ei_traits<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> > : ei_traits<XprType>
{ {
typedef typename ei_traits<XprType>::Scalar Scalar; typedef typename ei_traits<XprType>::Scalar Scalar;
typedef typename ei_traits<XprType>::StorageKind StorageKind; typedef typename ei_traits<XprType>::StorageKind StorageKind;
@ -92,15 +92,16 @@ struct ei_traits<Block<XprType, BlockRows, BlockCols, HasDirectAccess> > : ei_tr
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0) MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1) && (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0, ? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && ((OuterStrideAtCompileTime % ei_packet_traits<Scalar>::size) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | LvalueBit | DirectAccessBit), Flags0 = ei_traits<XprType>::Flags & (HereditaryBits | MaskPacketAccessBit | LvalueBit | DirectAccessBit | MaskAlignedBit),
Flags1 = Flags0 | FlagsLinearAccessBit, Flags1 = Flags0 | FlagsLinearAccessBit,
Flags = (Flags1 & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0) Flags = (Flags1 & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0)
}; };
}; };
template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> class Block template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool HasDirectAccess> class Block
: public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, HasDirectAccess> >::type : public ei_dense_xpr_base<Block<XprType, BlockRows, BlockCols, InnerPanel, HasDirectAccess> >::type
{ {
public: public:
@ -229,9 +230,9 @@ template<typename XprType, int BlockRows, int BlockCols, bool HasDirectAccess> c
}; };
/** \internal */ /** \internal */
template<typename XprType, int BlockRows, int BlockCols> template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
class Block<XprType,BlockRows,BlockCols,true> class Block<XprType,BlockRows,BlockCols, InnerPanel,true>
: public MapBase<Block<XprType, BlockRows, BlockCols,true> > : public MapBase<Block<XprType, BlockRows, BlockCols, InnerPanel, true> >
{ {
public: public:

View File

@ -233,17 +233,17 @@ template<typename Derived> class DenseBase
/** \internal the return type of MatrixBase::eigenvalues() */ /** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType; typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
/** \internal expression type of a column */ /** \internal expression type of a column */
typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1> ColXpr; typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1, !IsRowMajor> ColXpr;
/** \internal expression type of a row */ /** \internal expression type of a row */
typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime> RowXpr; typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime, IsRowMajor> RowXpr;
/** \internal expression type of a block of whole columns */ /** \internal expression type of a block of whole columns */
typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, Dynamic> ColsBlockXpr; typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, Dynamic, !IsRowMajor> ColsBlockXpr;
/** \internal expression type of a block of whole rows */ /** \internal expression type of a block of whole rows */
typedef Block<Derived, Dynamic, ei_traits<Derived>::ColsAtCompileTime> RowsBlockXpr; typedef Block<Derived, Dynamic, ei_traits<Derived>::ColsAtCompileTime, IsRowMajor> RowsBlockXpr;
/** \internal expression type of a block of whole columns */ /** \internal expression type of a block of whole columns */
template<int N> struct NColsBlockXpr { typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, N> Type; }; template<int N> struct NColsBlockXpr { typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, N, !IsRowMajor> Type; };
/** \internal expression type of a block of whole rows */ /** \internal expression type of a block of whole rows */
template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime> Type; }; template<int N> struct NRowsBlockXpr { typedef Block<Derived, N, ei_traits<Derived>::ColsAtCompileTime, IsRowMajor> Type; };
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN

View File

@ -100,7 +100,7 @@ struct ei_traits<Map<PlainObjectType, MapOptions, StrideType> >
|| ( OuterStrideAtCompileTime!=Dynamic || ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ), && ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
Flags0 = ei_traits<PlainObjectType>::Flags, Flags0 = ei_traits<PlainObjectType>::Flags,
Flags1 = IsAligned ? int(Flags0) | AlignedBit : int(Flags0) & ~AlignedBit, Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit), Flags2 = HasNoStride ? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
}; };

View File

@ -373,7 +373,7 @@ Packet4f ei_pcos<Packet4f>(const Packet4f& _x)
return _mm_xor_ps(y, sign_bit); return _mm_xor_ps(y, sign_bit);
} }
// This is Quake3's fast inverse square root. // This is based on Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/ // For detail see here: http://www.beyond3d.com/content/articles/8/
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f ei_psqrt<Packet4f>(const Packet4f& _x) Packet4f ei_psqrt<Packet4f>(const Packet4f& _x)

View File

@ -60,7 +60,7 @@ template<typename ExpressionType> class NestByValue;
template<typename ExpressionType> class ForceAlignedAccess; template<typename ExpressionType> class ForceAlignedAccess;
template<typename ExpressionType> class SwapWrapper; template<typename ExpressionType> class SwapWrapper;
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
bool HasDirectAccess = ei_has_direct_access<XprType>::ret> class Block; bool HasDirectAccess = ei_has_direct_access<XprType>::ret> class Block;
template<typename MatrixType, int Size=Dynamic> class VectorBlock; template<typename MatrixType, int Size=Dynamic> class VectorBlock;

View File

@ -149,8 +149,20 @@ template<typename Scalar, bool Enable = ei_packet_traits<Scalar>::Vectorizable>
VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(), VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
LinearTraversal,CompleteUnrolling)); LinearTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3),
InnerVectorizedTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix44r().row(2),Matrix44r().row(1)+Matrix44r().row(1),
InnerVectorizedTraversal,CompleteUnrolling));
if(PacketSize>1) if(PacketSize>1)
{ {
typedef Matrix<Scalar,3,3,ColMajor> Matrix33c;
VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
LinearTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
LinearTraversal,CompleteUnrolling));
VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()), VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()),
LinearVectorizedTraversal,CompleteUnrolling)); LinearVectorizedTraversal,CompleteUnrolling));