mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 09:39:34 +08:00
* Big change in Block and Map:
- added a MapBase base xpr on top of which Map and the specialization of Block are implemented - MapBase forces both aligned loads (and aligned stores, see below) in expressions such as "x.block(...) += other_expr" * Significant vectorization improvement: - added a AlignedBit flag meaning the first coeff/packet is aligned, this allows to not generate extra code to deal with the first unaligned part - removed all unaligned stores when no unrolling - removed unaligned loads in Sum when the input as the DirectAccessBit flag * Some code simplification in CacheFriendly product * Some minor documentation improvements
This commit is contained in:
parent
becbeda50a
commit
4fa40367e9
@ -45,6 +45,8 @@ namespace Eigen {
|
|||||||
#include "src/Core/Product.h"
|
#include "src/Core/Product.h"
|
||||||
#include "src/Core/DiagonalProduct.h"
|
#include "src/Core/DiagonalProduct.h"
|
||||||
#include "src/Core/InverseProduct.h"
|
#include "src/Core/InverseProduct.h"
|
||||||
|
#include "src/Core/MapBase.h"
|
||||||
|
#include "src/Core/Map.h"
|
||||||
#include "src/Core/Block.h"
|
#include "src/Core/Block.h"
|
||||||
#include "src/Core/Minor.h"
|
#include "src/Core/Minor.h"
|
||||||
#include "src/Core/Transpose.h"
|
#include "src/Core/Transpose.h"
|
||||||
@ -54,7 +56,6 @@ namespace Eigen {
|
|||||||
#include "src/Core/Redux.h"
|
#include "src/Core/Redux.h"
|
||||||
#include "src/Core/Visitor.h"
|
#include "src/Core/Visitor.h"
|
||||||
#include "src/Core/Fuzzy.h"
|
#include "src/Core/Fuzzy.h"
|
||||||
#include "src/Core/Map.h"
|
|
||||||
#include "src/Core/IO.h"
|
#include "src/Core/IO.h"
|
||||||
#include "src/Core/Swap.h"
|
#include "src/Core/Swap.h"
|
||||||
#include "src/Core/CommaInitializer.h"
|
#include "src/Core/CommaInitializer.h"
|
||||||
|
@ -34,6 +34,13 @@
|
|||||||
template <typename Derived, typename OtherDerived>
|
template <typename Derived, typename OtherDerived>
|
||||||
struct ei_assign_traits
|
struct ei_assign_traits
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
enum {
|
||||||
|
DstIsAligned = Derived::Flags & AlignedBit,
|
||||||
|
SrcIsAligned = OtherDerived::Flags & AlignedBit,
|
||||||
|
SrcAlignment = DstIsAligned && SrcIsAligned ? Aligned : Unaligned
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum {
|
enum {
|
||||||
InnerSize = int(Derived::Flags)&RowMajorBit
|
InnerSize = int(Derived::Flags)&RowMajorBit
|
||||||
@ -48,7 +55,8 @@ private:
|
|||||||
enum {
|
enum {
|
||||||
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit)
|
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit)
|
||||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
|
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
|
||||||
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0,
|
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
||||||
|
&& int(DstIsAligned) && int(SrcIsAligned),
|
||||||
MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||||
MaySliceVectorize = MightVectorize && int(InnerMaxSize)==Dynamic /* slice vectorization can be slow, so we only
|
MaySliceVectorize = MightVectorize && int(InnerMaxSize)==Dynamic /* slice vectorization can be slow, so we only
|
||||||
want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case
|
want it if the slices are big, which is indicated by InnerMaxSize rather than InnerSize, think of the case
|
||||||
@ -79,7 +87,7 @@ public:
|
|||||||
: int(NoUnrolling)
|
: int(NoUnrolling)
|
||||||
)
|
)
|
||||||
: int(Vectorization) == int(LinearVectorization)
|
: int(Vectorization) == int(LinearVectorization)
|
||||||
? ( int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) )
|
? ( int(MayUnrollCompletely) && int(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) )
|
||||||
: int(NoUnrolling)
|
: int(NoUnrolling)
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@ -154,7 +162,7 @@ struct ei_assign_innervec_CompleteUnrolling
|
|||||||
|
|
||||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||||
{
|
{
|
||||||
dst.template copyPacket<Derived2, Aligned>(row, col, src);
|
dst.template copyPacket<Derived2, Aligned, Aligned>(row, col, src);
|
||||||
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
|
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2,
|
||||||
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
|
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src);
|
||||||
}
|
}
|
||||||
@ -173,7 +181,7 @@ struct ei_assign_innervec_InnerUnrolling
|
|||||||
{
|
{
|
||||||
const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index;
|
const int row = int(Derived1::Flags)&RowMajorBit ? row_or_col : Index;
|
||||||
const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col;
|
const int col = int(Derived1::Flags)&RowMajorBit ? Index : row_or_col;
|
||||||
dst.template copyPacket<Derived2, Aligned>(row, col, src);
|
dst.template copyPacket<Derived2, Aligned, Aligned>(row, col, src);
|
||||||
ei_assign_innervec_InnerUnrolling<Derived1, Derived2,
|
ei_assign_innervec_InnerUnrolling<Derived1, Derived2,
|
||||||
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, row_or_col);
|
Index+ei_packet_traits<typename Derived1::Scalar>::size, Stop>::run(dst, src, row_or_col);
|
||||||
}
|
}
|
||||||
@ -256,9 +264,9 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
|
|||||||
for(int i = 0; i < innerSize; i+=packetSize)
|
for(int i = 0; i < innerSize; i+=packetSize)
|
||||||
{
|
{
|
||||||
if(int(Derived1::Flags)&RowMajorBit)
|
if(int(Derived1::Flags)&RowMajorBit)
|
||||||
dst.template copyPacket<Derived2, Aligned>(j, i, src);
|
dst.template copyPacket<Derived2, Aligned, Aligned>(j, i, src);
|
||||||
else
|
else
|
||||||
dst.template copyPacket<Derived2, Aligned>(i, j, src);
|
dst.template copyPacket<Derived2, Aligned, Aligned>(i, j, src);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -298,14 +306,19 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
|
|||||||
{
|
{
|
||||||
const int size = dst.size();
|
const int size = dst.size();
|
||||||
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||||
const int alignedSize = (size/packetSize)*packetSize;
|
const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
||||||
|
: ei_alignmentOffset(&dst.coeffRef(0), size);
|
||||||
|
const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||||
|
|
||||||
for(int index = 0; index < alignedSize; index += packetSize)
|
for(int index = 0; index < alignedStart; index++)
|
||||||
|
dst.copyCoeff(index, src);
|
||||||
|
|
||||||
|
for(int index = alignedStart; index < alignedEnd; index += packetSize)
|
||||||
{
|
{
|
||||||
dst.template copyPacket<Derived2, Aligned>(index, src);
|
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int index = alignedSize; index < size; index++)
|
for(int index = alignedEnd; index < size; index++)
|
||||||
dst.copyCoeff(index, src);
|
dst.copyCoeff(index, src);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -334,29 +347,45 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
|
|||||||
static void run(Derived1 &dst, const Derived2 &src)
|
static void run(Derived1 &dst, const Derived2 &src)
|
||||||
{
|
{
|
||||||
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||||
|
const int packetAlignedMask = packetSize - 1;
|
||||||
const int innerSize = dst.innerSize();
|
const int innerSize = dst.innerSize();
|
||||||
const int outerSize = dst.outerSize();
|
const int outerSize = dst.outerSize();
|
||||||
const int alignedInnerSize = (innerSize/packetSize)*packetSize;
|
const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
|
||||||
|
int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
||||||
|
: ei_alignmentOffset(&dst.coeffRef(0), innerSize);
|
||||||
|
|
||||||
for(int i = 0; i < outerSize; i++)
|
for(int i = 0; i < outerSize; i++)
|
||||||
{
|
{
|
||||||
// do the vectorizable part of the assignment
|
const int alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
|
||||||
for (int index = 0; index<alignedInnerSize ; index+=packetSize)
|
|
||||||
{
|
|
||||||
if(Derived1::Flags&RowMajorBit)
|
|
||||||
dst.template copyPacket<Derived2, Unaligned>(i, index, src);
|
|
||||||
else
|
|
||||||
dst.template copyPacket<Derived2, Unaligned>(index, i, src);
|
|
||||||
}
|
|
||||||
|
|
||||||
// do the non-vectorizable part of the assignment
|
// do the non-vectorizable part of the assignment
|
||||||
for (int index = alignedInnerSize; index<innerSize ; index++)
|
for (int index = 0; index<alignedStart ; index++)
|
||||||
{
|
{
|
||||||
if(Derived1::Flags&RowMajorBit)
|
if(Derived1::Flags&RowMajorBit)
|
||||||
dst.copyCoeff(i, index, src);
|
dst.copyCoeff(i, index, src);
|
||||||
else
|
else
|
||||||
dst.copyCoeff(index, i, src);
|
dst.copyCoeff(index, i, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// do the vectorizable part of the assignment
|
||||||
|
for (int index = alignedStart; index<alignedEnd; index+=packetSize)
|
||||||
|
{
|
||||||
|
if(Derived1::Flags&RowMajorBit)
|
||||||
|
dst.template copyPacket<Derived2, Aligned, Unaligned>(i, index, src);
|
||||||
|
else
|
||||||
|
dst.template copyPacket<Derived2, Aligned, Unaligned>(index, i, src);
|
||||||
|
}
|
||||||
|
|
||||||
|
// do the non-vectorizable part of the assignment
|
||||||
|
for (int index = alignedEnd; index<innerSize ; index++)
|
||||||
|
{
|
||||||
|
if(Derived1::Flags&RowMajorBit)
|
||||||
|
dst.copyCoeff(i, index, src);
|
||||||
|
else
|
||||||
|
dst.copyCoeff(index, i, src);
|
||||||
|
}
|
||||||
|
|
||||||
|
alignedStart = (alignedStart+alignedStep)%packetSize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -33,6 +33,8 @@
|
|||||||
* \param MatrixType the type of the object in which we are taking a block
|
* \param MatrixType the type of the object in which we are taking a block
|
||||||
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
|
* \param BlockRows the number of rows of the block we are taking at compile time (optional)
|
||||||
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
|
* \param BlockCols the number of columns of the block we are taking at compile time (optional)
|
||||||
|
* \param _PacketAccess
|
||||||
|
* \param _DirectAccessStatus \internal used for partial specialization
|
||||||
*
|
*
|
||||||
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
|
* This class represents an expression of either a fixed-size or dynamic-size block. It is the return
|
||||||
* type of MatrixBase::block(int,int,int,int) and MatrixBase::block<int,int>(int,int) and
|
* type of MatrixBase::block(int,int,int,int) and MatrixBase::block<int,int>(int,int) and
|
||||||
@ -56,8 +58,8 @@
|
|||||||
*
|
*
|
||||||
* \sa MatrixBase::block(int,int,int,int), MatrixBase::block(int,int), class VectorBlock
|
* \sa MatrixBase::block(int,int,int,int), MatrixBase::block(int,int), class VectorBlock
|
||||||
*/
|
*/
|
||||||
template<typename MatrixType, int BlockRows, int BlockCols, int DirectAccesStatus>
|
template<typename MatrixType, int BlockRows, int BlockCols, int _PacketAccess, int _DirectAccessStatus>
|
||||||
struct ei_traits<Block<MatrixType, BlockRows, BlockCols, DirectAccesStatus> >
|
struct ei_traits<Block<MatrixType, BlockRows, BlockCols, _PacketAccess, _DirectAccessStatus> >
|
||||||
{
|
{
|
||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
enum{
|
enum{
|
||||||
@ -74,17 +76,21 @@ struct ei_traits<Block<MatrixType, BlockRows, BlockCols, DirectAccesStatus> >
|
|||||||
RowMajor = int(MatrixType::Flags)&RowMajorBit,
|
RowMajor = int(MatrixType::Flags)&RowMajorBit,
|
||||||
InnerSize = RowMajor ? ColsAtCompileTime : RowsAtCompileTime,
|
InnerSize = RowMajor ? ColsAtCompileTime : RowsAtCompileTime,
|
||||||
InnerMaxSize = RowMajor ? MaxColsAtCompileTime : MaxRowsAtCompileTime,
|
InnerMaxSize = RowMajor ? MaxColsAtCompileTime : MaxRowsAtCompileTime,
|
||||||
MaskPacketAccessBit = (InnerMaxSize == Dynamic || (InnerSize % ei_packet_traits<Scalar>::size) == 0)
|
MaskPacketAccessBit = (InnerMaxSize == Dynamic || (InnerSize >= ei_packet_traits<Scalar>::size))
|
||||||
? PacketAccessBit : 0,
|
? PacketAccessBit : 0,
|
||||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
|
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
|
||||||
Flags = (MatrixType::Flags & (HereditaryBits | MaskPacketAccessBit | DirectAccessBit) & MaskLargeBit)
|
Flags = (MatrixType::Flags & (HereditaryBits | MaskPacketAccessBit | DirectAccessBit) & MaskLargeBit)
|
||||||
| FlagsLinearAccessBit,
|
| FlagsLinearAccessBit,
|
||||||
CoeffReadCost = MatrixType::CoeffReadCost
|
CoeffReadCost = MatrixType::CoeffReadCost,
|
||||||
|
PacketAccess = _PacketAccess
|
||||||
};
|
};
|
||||||
|
typedef typename ei_meta_if<int(PacketAccess)==Aligned,
|
||||||
|
Block<MatrixType, BlockRows, BlockCols, _PacketAccess, _DirectAccessStatus>&,
|
||||||
|
Block<MatrixType, BlockRows, BlockCols, Aligned, _DirectAccessStatus> >::ret AlignedDerivedType;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename MatrixType, int BlockRows, int BlockCols, int DirectAccesStatus> class Block
|
template<typename MatrixType, int BlockRows, int BlockCols, int PacketAccess, int _DirectAccessStatus> class Block
|
||||||
: public MatrixBase<Block<MatrixType, BlockRows, BlockCols, DirectAccesStatus> >
|
: public MatrixBase<Block<MatrixType, BlockRows, BlockCols, PacketAccess, _DirectAccessStatus> >
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -205,26 +211,36 @@ template<typename MatrixType, int BlockRows, int BlockCols, int DirectAccesStatu
|
|||||||
};
|
};
|
||||||
|
|
||||||
/** \internal */
|
/** \internal */
|
||||||
template<typename MatrixType, int BlockRows, int BlockCols> class Block<MatrixType,BlockRows,BlockCols,HasDirectAccess>
|
template<typename MatrixType, int BlockRows, int BlockCols, int PacketAccess>
|
||||||
: public MatrixBase<Block<MatrixType, BlockRows, BlockCols,HasDirectAccess> >
|
class Block<MatrixType,BlockRows,BlockCols,PacketAccess,HasDirectAccess>
|
||||||
|
: public MapBase<Block<MatrixType, BlockRows, BlockCols,PacketAccess,HasDirectAccess> >
|
||||||
{
|
{
|
||||||
enum {
|
|
||||||
IsRowMajor = int(ei_traits<MatrixType>::Flags)&RowMajorBit ? 1 : 0
|
|
||||||
};
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Block)
|
_EIGEN_GENERIC_PUBLIC_INTERFACE(Block, MapBase<Block>)
|
||||||
|
|
||||||
|
typedef typename ei_traits<Block>::AlignedDerivedType AlignedDerivedType;
|
||||||
|
|
||||||
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
|
||||||
|
|
||||||
|
AlignedDerivedType allowAligned()
|
||||||
|
{
|
||||||
|
if (PacketAccess==Aligned)
|
||||||
|
return *this;
|
||||||
|
else
|
||||||
|
return Block<MatrixType,BlockRows,BlockCols,Aligned,HasDirectAccess>
|
||||||
|
(m_matrix, Base::m_data, Base::m_rows.value(), Base::m_cols.value());
|
||||||
|
}
|
||||||
|
|
||||||
/** Column or Row constructor
|
/** Column or Row constructor
|
||||||
*/
|
*/
|
||||||
inline Block(const MatrixType& matrix, int i)
|
inline Block(const MatrixType& matrix, int i)
|
||||||
: m_matrix(matrix),
|
: Base(&matrix.const_cast_derived().coeffRef(
|
||||||
m_data_ptr(&matrix.const_cast_derived().coeffRef(
|
(BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) ? i : 0,
|
||||||
(BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) ? i : 0,
|
(BlockRows==MatrixType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
|
||||||
(BlockRows==MatrixType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)),
|
BlockRows==1 ? 1 : matrix.rows(),
|
||||||
m_blockRows(matrix.rows()),
|
BlockCols==1 ? 1 : matrix.cols()),
|
||||||
m_blockCols(matrix.cols())
|
m_matrix(matrix)
|
||||||
{
|
{
|
||||||
ei_assert( (i>=0) && (
|
ei_assert( (i>=0) && (
|
||||||
((BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) && i<matrix.rows())
|
((BlockRows==1) && (BlockCols==MatrixType::ColsAtCompileTime) && i<matrix.rows())
|
||||||
@ -234,13 +250,10 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block<MatrixTy
|
|||||||
/** Fixed-size constructor
|
/** Fixed-size constructor
|
||||||
*/
|
*/
|
||||||
inline Block(const MatrixType& matrix, int startRow, int startCol)
|
inline Block(const MatrixType& matrix, int startRow, int startCol)
|
||||||
: m_matrix(matrix), m_data_ptr(&matrix.const_cast_derived().coeffRef(startRow,startCol)),
|
: Base(&matrix.const_cast_derived().coeffRef(startRow,startCol)), m_matrix(matrix)
|
||||||
m_blockRows(matrix.rows()), m_blockCols(matrix.cols())
|
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && RowsAtCompileTime!=Dynamic,this_method_is_only_for_fixed_size);
|
|
||||||
ei_assert(RowsAtCompileTime!=Dynamic && RowsAtCompileTime!=Dynamic);
|
|
||||||
ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= matrix.rows()
|
ei_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= matrix.rows()
|
||||||
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= matrix.cols());
|
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= matrix.cols());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Dynamic-size constructor
|
/** Dynamic-size constructor
|
||||||
@ -248,91 +261,25 @@ template<typename MatrixType, int BlockRows, int BlockCols> class Block<MatrixTy
|
|||||||
inline Block(const MatrixType& matrix,
|
inline Block(const MatrixType& matrix,
|
||||||
int startRow, int startCol,
|
int startRow, int startCol,
|
||||||
int blockRows, int blockCols)
|
int blockRows, int blockCols)
|
||||||
: m_matrix(matrix), m_data_ptr(&matrix.const_cast_derived().coeffRef(startRow,startCol)),
|
: Base(&matrix.const_cast_derived().coeffRef(startRow,startCol), blockRows, blockCols),
|
||||||
m_blockRows(blockRows), m_blockCols(blockCols)
|
m_matrix(matrix)
|
||||||
{
|
{
|
||||||
ei_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
|
ei_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
|
||||||
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
|
&& (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
|
||||||
ei_assert(startRow >= 0 && blockRows >= 1 && startRow + blockRows <= matrix.rows()
|
ei_assert(startRow >= 0 && blockRows >= 1 && startRow + blockRows <= matrix.rows()
|
||||||
&& startCol >= 0 && blockCols >= 1 && startCol + blockCols <= matrix.cols());
|
&& startCol >= 0 && blockCols >= 1 && startCol + blockCols <= matrix.cols());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block)
|
|
||||||
|
|
||||||
inline int rows() const { return m_blockRows.value(); }
|
|
||||||
inline int cols() const { return m_blockCols.value(); }
|
|
||||||
|
|
||||||
inline int stride(void) const { return m_matrix.stride(); }
|
inline int stride(void) const { return m_matrix.stride(); }
|
||||||
|
|
||||||
inline Scalar& coeffRef(int row, int col)
|
|
||||||
{
|
|
||||||
if (IsRowMajor)
|
|
||||||
return m_data_ptr[col + row * stride()];
|
|
||||||
else
|
|
||||||
return m_data_ptr[row + col * stride()];
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const Scalar coeff(int row, int col) const
|
|
||||||
{
|
|
||||||
if (IsRowMajor)
|
|
||||||
return m_data_ptr[col + row * stride()];
|
|
||||||
else
|
|
||||||
return m_data_ptr[row + col * stride()];
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Scalar& coeffRef(int index)
|
|
||||||
{
|
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Block);
|
|
||||||
return m_data_ptr[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const Scalar coeff(int index) const
|
|
||||||
{
|
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Block);
|
|
||||||
if ( (RowsAtCompileTime == 1) == IsRowMajor )
|
|
||||||
return m_data_ptr[index];
|
|
||||||
else
|
|
||||||
return m_data_ptr[index*stride()];
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline PacketScalar packet(int row, int col) const
|
|
||||||
{
|
|
||||||
if (IsRowMajor)
|
|
||||||
return ei_ploadu(&m_data_ptr[col + row * stride()]);
|
|
||||||
else
|
|
||||||
return ei_ploadu(&m_data_ptr[row + col * stride()]);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline void writePacket(int row, int col, const PacketScalar& x)
|
|
||||||
{
|
|
||||||
if (IsRowMajor)
|
|
||||||
ei_pstoreu(&m_data_ptr[col + row * stride()], x);
|
|
||||||
else
|
|
||||||
ei_pstoreu(&m_data_ptr[row + col * stride()], x);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline PacketScalar packet(int index) const
|
|
||||||
{
|
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Block);
|
|
||||||
return ei_ploadu(&m_data_ptr[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline void writePacket(int index, const PacketScalar& x)
|
|
||||||
{
|
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Block);
|
|
||||||
ei_pstoreu(&m_data_ptr[index], x);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
/** \internal used by allowAligned() */
|
||||||
|
inline Block(const MatrixType& matrix, const Scalar* data, int blockRows, int blockCols)
|
||||||
|
: Base(data, blockRows, blockCols), m_matrix(matrix)
|
||||||
|
{}
|
||||||
|
|
||||||
const typename MatrixType::Nested m_matrix;
|
const typename MatrixType::Nested m_matrix;
|
||||||
Scalar* m_data_ptr;
|
|
||||||
const ei_int_if_dynamic<RowsAtCompileTime> m_blockRows;
|
|
||||||
const ei_int_if_dynamic<ColsAtCompileTime> m_blockCols;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \returns a dynamic-size expression of a block in *this.
|
/** \returns a dynamic-size expression of a block in *this.
|
||||||
|
@ -420,15 +420,18 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector(
|
|||||||
ei_internal_assert((alignmentPattern==NoneAligned) || (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(Packet))==0);
|
ei_internal_assert((alignmentPattern==NoneAligned) || (size_t(lhs+alignedStart+lhsStride*skipColumns)%sizeof(Packet))==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int offset1 = (FirstAligned && alignmentStep==1?3:1);
|
||||||
|
int offset3 = (FirstAligned && alignmentStep==1?1:3);
|
||||||
|
|
||||||
int columnBound = ((rhs.size()-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
|
int columnBound = ((rhs.size()-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
|
||||||
for (int i=skipColumns; i<columnBound; i+=columnsAtOnce)
|
for (int i=skipColumns; i<columnBound; i+=columnsAtOnce)
|
||||||
{
|
{
|
||||||
Packet ptmp0 = ei_pset1(rhs[i]), ptmp1 = ei_pset1(rhs[i+1]),
|
Packet ptmp0 = ei_pset1(rhs[i]), ptmp1 = ei_pset1(rhs[i+offset1]),
|
||||||
ptmp2 = ei_pset1(rhs[i+2]), ptmp3 = ei_pset1(rhs[i+3]);
|
ptmp2 = ei_pset1(rhs[i+2]), ptmp3 = ei_pset1(rhs[i+offset3]);
|
||||||
|
|
||||||
// this helps a lot generating better binary code
|
// this helps a lot generating better binary code
|
||||||
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+1)*lhsStride,
|
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
|
||||||
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+3)*lhsStride;
|
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
|
||||||
|
|
||||||
if (PacketSize>1)
|
if (PacketSize>1)
|
||||||
{
|
{
|
||||||
@ -453,12 +456,6 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_colmajor_times_vector(
|
|||||||
if(peels>1)
|
if(peels>1)
|
||||||
{
|
{
|
||||||
Packet A00, A01, A02, A03, A10, A11, A12, A13;
|
Packet A00, A01, A02, A03, A10, A11, A12, A13;
|
||||||
if (alignmentStep==1)
|
|
||||||
{
|
|
||||||
A00 = ptmp1; ptmp1 = ptmp3; ptmp3 = A00;
|
|
||||||
const Scalar* aux = lhs1;
|
|
||||||
lhs1 = lhs3; lhs3 = aux;
|
|
||||||
}
|
|
||||||
|
|
||||||
A01 = ei_pload(&lhs1[alignedStart-1]);
|
A01 = ei_pload(&lhs1[alignedStart-1]);
|
||||||
A02 = ei_pload(&lhs2[alignedStart-2]);
|
A02 = ei_pload(&lhs2[alignedStart-2]);
|
||||||
@ -614,14 +611,17 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
|
|||||||
|| (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0);
|
|| (size_t(lhs+alignedStart+lhsStride*skipRows)%sizeof(Packet))==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int offset1 = (FirstAligned && alignmentStep==1?3:1);
|
||||||
|
int offset3 = (FirstAligned && alignmentStep==1?1:3);
|
||||||
|
|
||||||
int rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
|
int rowBound = ((res.size()-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
|
||||||
for (int i=skipRows; i<rowBound; i+=rowsAtOnce)
|
for (int i=skipRows; i<rowBound; i+=rowsAtOnce)
|
||||||
{
|
{
|
||||||
Scalar tmp0 = Scalar(0), tmp1 = Scalar(0), tmp2 = Scalar(0), tmp3 = Scalar(0);
|
Scalar tmp0 = Scalar(0), tmp1 = Scalar(0), tmp2 = Scalar(0), tmp3 = Scalar(0);
|
||||||
|
|
||||||
// this helps the compiler generating good binary code
|
// this helps the compiler generating good binary code
|
||||||
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+1)*lhsStride,
|
const Scalar *lhs0 = lhs + i*lhsStride, *lhs1 = lhs + (i+offset1)*lhsStride,
|
||||||
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+3)*lhsStride;
|
*lhs2 = lhs + (i+2)*lhsStride, *lhs3 = lhs + (i+offset3)*lhsStride;
|
||||||
|
|
||||||
if (PacketSize>1)
|
if (PacketSize>1)
|
||||||
{
|
{
|
||||||
@ -658,13 +658,6 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
|
|||||||
* than basic unaligned loads.
|
* than basic unaligned loads.
|
||||||
*/
|
*/
|
||||||
Packet A01, A02, A03, b, A11, A12, A13;
|
Packet A01, A02, A03, b, A11, A12, A13;
|
||||||
if (alignmentStep==1)
|
|
||||||
{
|
|
||||||
// flip row #1 and #3
|
|
||||||
b = ptmp1; ptmp1 = ptmp3; ptmp3 = b;
|
|
||||||
const Scalar* aux = lhs1;
|
|
||||||
lhs1 = lhs3; lhs3 = aux;
|
|
||||||
}
|
|
||||||
A01 = ei_pload(&lhs1[alignedStart-1]);
|
A01 = ei_pload(&lhs1[alignedStart-1]);
|
||||||
A02 = ei_pload(&lhs2[alignedStart-2]);
|
A02 = ei_pload(&lhs2[alignedStart-2]);
|
||||||
A03 = ei_pload(&lhs3[alignedStart-3]);
|
A03 = ei_pload(&lhs3[alignedStart-3]);
|
||||||
@ -690,13 +683,6 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
|
|||||||
ptmp2 = ei_pmadd(b, A12, ptmp2);
|
ptmp2 = ei_pmadd(b, A12, ptmp2);
|
||||||
ptmp3 = ei_pmadd(b, A13, ptmp3);
|
ptmp3 = ei_pmadd(b, A13, ptmp3);
|
||||||
}
|
}
|
||||||
if (alignmentStep==1)
|
|
||||||
{
|
|
||||||
// restore rows #1 and #3
|
|
||||||
b = ptmp1; ptmp1 = ptmp3; ptmp3 = b;
|
|
||||||
const Scalar* aux = lhs1;
|
|
||||||
lhs1 = lhs3; lhs3 = aux;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for (int j = peeledSize; j<alignedSize; j+=PacketSize)
|
for (int j = peeledSize; j<alignedSize; j+=PacketSize)
|
||||||
_EIGEN_ACCUMULATE_PACKETS(,u,u,);
|
_EIGEN_ACCUMULATE_PACKETS(,u,u,);
|
||||||
@ -720,7 +706,7 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
|
|||||||
Scalar b = rhs[j];
|
Scalar b = rhs[j];
|
||||||
tmp0 += b*lhs0[j]; tmp1 += b*lhs1[j]; tmp2 += b*lhs2[j]; tmp3 += b*lhs3[j];
|
tmp0 += b*lhs0[j]; tmp1 += b*lhs1[j]; tmp2 += b*lhs2[j]; tmp3 += b*lhs3[j];
|
||||||
}
|
}
|
||||||
res[i] += tmp0; res[i+1] += tmp1; res[i+2] += tmp2; res[i+3] += tmp3;
|
res[i] += tmp0; res[i+offset1] += tmp1; res[i+2] += tmp2; res[i+offset3] += tmp3;
|
||||||
}
|
}
|
||||||
|
|
||||||
// process remaining first and last rows (at most columnsAtOnce-1)
|
// process remaining first and last rows (at most columnsAtOnce-1)
|
||||||
|
@ -298,22 +298,22 @@ inline void MatrixBase<Derived>::copyCoeff(int index, const MatrixBase<OtherDeri
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename OtherDerived, int LoadStoreMode>
|
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||||
inline void MatrixBase<Derived>::copyPacket(int row, int col, const MatrixBase<OtherDerived>& other)
|
inline void MatrixBase<Derived>::copyPacket(int row, int col, const MatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
ei_internal_assert(row >= 0 && row < rows()
|
ei_internal_assert(row >= 0 && row < rows()
|
||||||
&& col >= 0 && col < cols());
|
&& col >= 0 && col < cols());
|
||||||
derived().template writePacket<LoadStoreMode>(row, col,
|
derived().template writePacket<StoreMode>(row, col,
|
||||||
other.derived().template packet<LoadStoreMode>(row, col));
|
other.derived().template packet<LoadMode>(row, col));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename OtherDerived, int LoadStoreMode>
|
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||||
inline void MatrixBase<Derived>::copyPacket(int index, const MatrixBase<OtherDerived>& other)
|
inline void MatrixBase<Derived>::copyPacket(int index, const MatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
ei_internal_assert(index >= 0 && index < size());
|
ei_internal_assert(index >= 0 && index < size());
|
||||||
derived().template writePacket<LoadStoreMode>(index,
|
derived().template writePacket<StoreMode>(index,
|
||||||
other.derived().template packet<LoadStoreMode>(index));
|
other.derived().template packet<LoadMode>(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // EIGEN_COEFFS_H
|
#endif // EIGEN_COEFFS_H
|
||||||
|
@ -67,7 +67,7 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
|||||||
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
|
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
|
||||||
Flags = (int(LhsFlags) | int(RhsFlags)) & (
|
Flags = (int(LhsFlags) | int(RhsFlags)) & (
|
||||||
HereditaryBits
|
HereditaryBits
|
||||||
| (int(LhsFlags) & int(RhsFlags) & LinearAccessBit)
|
| (int(LhsFlags) & int(RhsFlags) & (LinearAccessBit | AlignedBit))
|
||||||
| (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
|
| (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
|
||||||
? (int(LhsFlags) & int(RhsFlags) & PacketAccessBit) : 0)),
|
? (int(LhsFlags) & int(RhsFlags) & PacketAccessBit) : 0)),
|
||||||
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
|
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
|
||||||
|
@ -55,7 +55,7 @@ struct ei_traits<CwiseUnaryOp<UnaryOp, MatrixType> >
|
|||||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||||
Flags = (MatrixTypeFlags & (
|
Flags = (MatrixTypeFlags & (
|
||||||
HereditaryBits | LinearAccessBit
|
HereditaryBits | LinearAccessBit | AlignedBit
|
||||||
| (ei_functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))),
|
| (ei_functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))),
|
||||||
CoeffReadCost = MatrixTypeCoeffReadCost + ei_functor_traits<UnaryOp>::Cost
|
CoeffReadCost = MatrixTypeCoeffReadCost + ei_functor_traits<UnaryOp>::Cost
|
||||||
};
|
};
|
||||||
|
@ -123,7 +123,9 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
|
|||||||
row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index,
|
row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index,
|
||||||
col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0,
|
col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0,
|
||||||
row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index,
|
row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index,
|
||||||
col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0
|
col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0,
|
||||||
|
alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned,
|
||||||
|
alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename Derived1::Scalar Scalar;
|
typedef typename Derived1::Scalar Scalar;
|
||||||
@ -131,7 +133,7 @@ struct ei_dot_vec_unroller<Derived1, Derived2, Index, Stop, true>
|
|||||||
|
|
||||||
inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
|
inline static PacketScalar run(const Derived1& v1, const Derived2& v2)
|
||||||
{
|
{
|
||||||
return ei_pmul(v1.template packet<Aligned>(row1, col1), v2.template packet<Aligned>(row2, col2));
|
return ei_pmul(v1.template packet<alignment1>(row1, col1), v2.template packet<alignment2>(row2, col2));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -175,20 +177,22 @@ struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
|
|||||||
const int size = v1.size();
|
const int size = v1.size();
|
||||||
const int packetSize = ei_packet_traits<Scalar>::size;
|
const int packetSize = ei_packet_traits<Scalar>::size;
|
||||||
const int alignedSize = (size/packetSize)*packetSize;
|
const int alignedSize = (size/packetSize)*packetSize;
|
||||||
|
const int alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned;
|
||||||
|
const int alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned;
|
||||||
Scalar res;
|
Scalar res;
|
||||||
|
|
||||||
// do the vectorizable part of the sum
|
// do the vectorizable part of the sum
|
||||||
if(size >= packetSize)
|
if(size >= packetSize)
|
||||||
{
|
{
|
||||||
PacketScalar packet_res = ei_pmul(
|
PacketScalar packet_res = ei_pmul(
|
||||||
v1.template packet<Aligned>(0),
|
v1.template packet<alignment1>(0),
|
||||||
v2.template packet<Aligned>(0)
|
v2.template packet<alignment2>(0)
|
||||||
);
|
);
|
||||||
for(int index = packetSize; index<alignedSize; index += packetSize)
|
for(int index = packetSize; index<alignedSize; index += packetSize)
|
||||||
{
|
{
|
||||||
packet_res = ei_pmadd(
|
packet_res = ei_pmadd(
|
||||||
v1.template packet<Aligned>(index),
|
v1.template packet<alignment1>(index),
|
||||||
v2.template packet<Aligned>(index),
|
v2.template packet<alignment2>(index),
|
||||||
packet_res
|
packet_res
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
// for linear algebra. Eigen itself is part of the KDE project.
|
// for linear algebra. Eigen itself is part of the KDE project.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
//
|
//
|
||||||
// Eigen is free software; you can redistribute it and/or
|
// Eigen is free software; you can redistribute it and/or
|
||||||
// modify it under the terms of the GNU Lesser General Public
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
@ -29,8 +30,8 @@
|
|||||||
*
|
*
|
||||||
* \brief A matrix or vector expression mapping an existing array of data.
|
* \brief A matrix or vector expression mapping an existing array of data.
|
||||||
*
|
*
|
||||||
* \param Alignment can be either Aligned or Unaligned. Tells whether the array is suitably aligned for
|
* \param _PacketAccess controls whether vectorized aligned loads or stores are allowed (Aligned)
|
||||||
* vectorization on the present CPU architecture. Defaults to Unaligned.
|
* or forced to unaligned (Unaligned). Defaults to Unaligned.
|
||||||
*
|
*
|
||||||
* This class represents a matrix or vector expression mapping an existing array of data.
|
* This class represents a matrix or vector expression mapping an existing array of data.
|
||||||
* It can be used to let Eigen interface without any overhead with non-Eigen data structures,
|
* It can be used to let Eigen interface without any overhead with non-Eigen data structures,
|
||||||
@ -40,117 +41,43 @@
|
|||||||
*
|
*
|
||||||
* \sa Matrix::map()
|
* \sa Matrix::map()
|
||||||
*/
|
*/
|
||||||
template<typename MatrixType, int Alignment>
|
template<typename MatrixType, int _PacketAccess>
|
||||||
struct ei_traits<Map<MatrixType, Alignment> >
|
struct ei_traits<Map<MatrixType, _PacketAccess> > : public ei_traits<MatrixType>
|
||||||
{
|
{
|
||||||
typedef typename MatrixType::Scalar Scalar;
|
|
||||||
enum {
|
enum {
|
||||||
RowsAtCompileTime = MatrixType::RowsAtCompileTime,
|
PacketAccess = _PacketAccess,
|
||||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
Flags = ei_traits<MatrixType>::Flags & ~AlignedBit
|
||||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
|
||||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
|
||||||
Flags = MatrixType::Flags,
|
|
||||||
CoeffReadCost = NumTraits<Scalar>::ReadCost
|
|
||||||
};
|
};
|
||||||
|
typedef typename ei_meta_if<int(PacketAccess)==Aligned,
|
||||||
|
Map<MatrixType, _PacketAccess>&,
|
||||||
|
Map<MatrixType, Aligned> >::ret AlignedDerivedType;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename MatrixType, int Alignment> class Map
|
template<typename MatrixType, int PacketAccess> class Map
|
||||||
: public MatrixBase<Map<MatrixType, Alignment> >
|
: public MapBase<Map<MatrixType, PacketAccess> >
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Map)
|
_EIGEN_GENERIC_PUBLIC_INTERFACE(Map, MapBase<Map>)
|
||||||
|
typedef typename ei_traits<Map>::AlignedDerivedType AlignedDerivedType;
|
||||||
inline int rows() const { return m_rows.value(); }
|
|
||||||
inline int cols() const { return m_cols.value(); }
|
|
||||||
|
|
||||||
inline int stride() const { return this->innerSize(); }
|
inline int stride() const { return this->innerSize(); }
|
||||||
|
|
||||||
inline const Scalar& coeff(int row, int col) const
|
AlignedDerivedType allowAligned()
|
||||||
{
|
{
|
||||||
if(Flags & RowMajorBit)
|
if (PacketAccess==Aligned)
|
||||||
return m_data[col + row * m_cols.value()];
|
return *this;
|
||||||
else // column-major
|
else
|
||||||
return m_data[row + col * m_rows.value()];
|
return Map<MatrixType,Aligned>(Base::m_data, Base::m_rows.value(), Base::m_cols.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Scalar& coeffRef(int row, int col)
|
inline Map(const Scalar* data) : Base(data) {}
|
||||||
{
|
|
||||||
if(Flags & RowMajorBit)
|
|
||||||
return const_cast<Scalar*>(m_data)[col + row * m_cols.value()];
|
|
||||||
else // column-major
|
|
||||||
return const_cast<Scalar*>(m_data)[row + col * m_rows.value()];
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const Scalar& coeff(int index) const
|
inline Map(const Scalar* data, int size) : Base(data, size) {}
|
||||||
{
|
|
||||||
return m_data[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Scalar& coeffRef(int index)
|
inline Map(const Scalar* data, int rows, int cols) : Base(data, rows, cols) {}
|
||||||
{
|
|
||||||
return *const_cast<Scalar*>(m_data + index);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline PacketScalar packet(int row, int col) const
|
|
||||||
{
|
|
||||||
return ei_ploadt<Scalar, LoadMode == Aligned ? Alignment : Unaligned>
|
|
||||||
(m_data + (Flags & RowMajorBit
|
|
||||||
? col + row * m_cols.value()
|
|
||||||
: row + col * m_rows.value()));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline PacketScalar packet(int index) const
|
|
||||||
{
|
|
||||||
return ei_ploadt<Scalar, LoadMode == Aligned ? Alignment : Unaligned>(m_data + index);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int StoreMode>
|
|
||||||
inline void writePacket(int row, int col, const PacketScalar& x)
|
|
||||||
{
|
|
||||||
ei_pstoret<Scalar, PacketScalar, StoreMode == Aligned ? Alignment : Unaligned>
|
|
||||||
(const_cast<Scalar*>(m_data) + (Flags & RowMajorBit
|
|
||||||
? col + row * m_cols.value()
|
|
||||||
: row + col * m_rows.value()), x);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int StoreMode>
|
|
||||||
inline void writePacket(int index, const PacketScalar& x)
|
|
||||||
{
|
|
||||||
ei_pstoret<Scalar, PacketScalar, StoreMode == Aligned ? Alignment : Unaligned>
|
|
||||||
(const_cast<Scalar*>(m_data) + index, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Map(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
|
|
||||||
{
|
|
||||||
EIGEN_STATIC_ASSERT_FIXED_SIZE(MatrixType)
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Map(const Scalar* data, int size)
|
|
||||||
: m_data(data),
|
|
||||||
m_rows(RowsAtCompileTime == Dynamic ? size : RowsAtCompileTime),
|
|
||||||
m_cols(ColsAtCompileTime == Dynamic ? size : ColsAtCompileTime)
|
|
||||||
{
|
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(MatrixType)
|
|
||||||
ei_assert(size > 0);
|
|
||||||
ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Map(const Scalar* data, int rows, int cols)
|
|
||||||
: m_data(data), m_rows(rows), m_cols(cols)
|
|
||||||
{
|
|
||||||
ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
|
||||||
&& cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
|
||||||
|
|
||||||
protected:
|
|
||||||
const Scalar* m_data;
|
|
||||||
const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
|
|
||||||
const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Constructor copying an existing array of data.
|
/** Constructor copying an existing array of data.
|
||||||
|
167
Eigen/src/Core/MapBase.h
Normal file
167
Eigen/src/Core/MapBase.h
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra. Eigen itself is part of the KDE project.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||||
|
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
|
||||||
|
//
|
||||||
|
// Eigen is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
|
// License as published by the Free Software Foundation; either
|
||||||
|
// version 3 of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Alternatively, you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License as
|
||||||
|
// published by the Free Software Foundation; either version 2 of
|
||||||
|
// the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public
|
||||||
|
// License and a copy of the GNU General Public License along with
|
||||||
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#ifndef EIGEN_MAPBASE_H
|
||||||
|
#define EIGEN_MAPBASE_H
|
||||||
|
|
||||||
|
/** \internal
|
||||||
|
*
|
||||||
|
* \class MapBase
|
||||||
|
*
|
||||||
|
* \brief Base class for Map and Block expression with direct access
|
||||||
|
*
|
||||||
|
* \sa class Map, class Block
|
||||||
|
*/
|
||||||
|
template<typename Derived> class MapBase
|
||||||
|
: public MatrixBase<Derived>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
typedef MatrixBase<Derived> Base;
|
||||||
|
enum {
|
||||||
|
IsRowMajor = int(ei_traits<Derived>::Flags) & RowMajorBit ? 1 : 0,
|
||||||
|
PacketAccess = ei_traits<Derived>::PacketAccess,
|
||||||
|
RowsAtCompileTime = ei_traits<Derived>::RowsAtCompileTime,
|
||||||
|
ColsAtCompileTime = ei_traits<Derived>::ColsAtCompileTime,
|
||||||
|
SizeAtCompileTime = Base::SizeAtCompileTime
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef typename ei_traits<Derived>::AlignedDerivedType AlignedDerivedType;
|
||||||
|
typedef typename ei_traits<Derived>::Scalar Scalar;
|
||||||
|
typedef typename Base::PacketScalar PacketScalar;
|
||||||
|
using Base::derived;
|
||||||
|
|
||||||
|
inline int rows() const { return m_rows.value(); }
|
||||||
|
inline int cols() const { return m_cols.value(); }
|
||||||
|
|
||||||
|
inline int stride() const { return derived().stride(); }
|
||||||
|
AlignedDerivedType allowAligned() { return derived().allowAligned(); }
|
||||||
|
|
||||||
|
inline const Scalar& coeff(int row, int col) const
|
||||||
|
{
|
||||||
|
if(IsRowMajor)
|
||||||
|
return m_data[col + row * stride()];
|
||||||
|
else // column-major
|
||||||
|
return m_data[row + col * stride()];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Scalar& coeffRef(int row, int col)
|
||||||
|
{
|
||||||
|
if(IsRowMajor)
|
||||||
|
return const_cast<Scalar*>(m_data)[col + row * stride()];
|
||||||
|
else // column-major
|
||||||
|
return const_cast<Scalar*>(m_data)[row + col * stride()];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const Scalar coeff(int index) const
|
||||||
|
{
|
||||||
|
ei_assert(Derived::IsVectorAtCompileTime || (ei_traits<Derived>::Flags & LinearAccessBit));
|
||||||
|
if ( ((RowsAtCompileTime == 1) == IsRowMajor) )
|
||||||
|
return m_data[index];
|
||||||
|
else
|
||||||
|
return m_data[index*stride()];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Scalar& coeffRef(int index)
|
||||||
|
{
|
||||||
|
return *const_cast<Scalar*>(m_data + index);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
inline PacketScalar packet(int row, int col) const
|
||||||
|
{
|
||||||
|
return ei_ploadt<Scalar, int(PacketAccess) == Aligned ? Aligned : LoadMode>
|
||||||
|
(m_data + (IsRowMajor ? col + row * stride()
|
||||||
|
: row + col * stride()));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
inline PacketScalar packet(int index) const
|
||||||
|
{
|
||||||
|
return ei_ploadt<Scalar, int(PacketAccess) == Aligned ? Aligned : LoadMode>(m_data + index);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int StoreMode>
|
||||||
|
inline void writePacket(int row, int col, const PacketScalar& x)
|
||||||
|
{
|
||||||
|
ei_pstoret<Scalar, PacketScalar, int(PacketAccess) == Aligned ? Aligned : StoreMode>
|
||||||
|
(const_cast<Scalar*>(m_data) + (IsRowMajor ? col + row * stride()
|
||||||
|
: row + col * stride()), x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int StoreMode>
|
||||||
|
inline void writePacket(int index, const PacketScalar& x)
|
||||||
|
{
|
||||||
|
ei_pstoret<Scalar, PacketScalar, int(PacketAccess) == Aligned ? Aligned : StoreMode>
|
||||||
|
(const_cast<Scalar*>(m_data) + index, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline MapBase(const Scalar* data) : m_data(data), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
|
||||||
|
{
|
||||||
|
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
|
||||||
|
}
|
||||||
|
|
||||||
|
inline MapBase(const Scalar* data, int size)
|
||||||
|
: m_data(data),
|
||||||
|
m_rows(RowsAtCompileTime == Dynamic ? size : RowsAtCompileTime),
|
||||||
|
m_cols(ColsAtCompileTime == Dynamic ? size : ColsAtCompileTime)
|
||||||
|
{
|
||||||
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||||
|
ei_assert(size > 0);
|
||||||
|
ei_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == size);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline MapBase(const Scalar* data, int rows, int cols)
|
||||||
|
: m_data(data), m_rows(rows), m_cols(cols)
|
||||||
|
{
|
||||||
|
ei_assert(rows > 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows)
|
||||||
|
&& cols > 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols));
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MapBase)
|
||||||
|
|
||||||
|
// EIGEN_INHERIT_ASSIGNMENT_OPERATOR(MapBase, =)
|
||||||
|
|
||||||
|
template<typename OtherDerived>
|
||||||
|
Derived& operator+=(const MatrixBase<OtherDerived>& other)
|
||||||
|
{ return derived() = allowAligned() + other; }
|
||||||
|
|
||||||
|
template<typename OtherDerived>
|
||||||
|
Derived& operator-=(const MatrixBase<OtherDerived>& other)
|
||||||
|
{ return derived() = allowAligned() - other; }
|
||||||
|
|
||||||
|
Derived& operator*=(const Scalar& other)
|
||||||
|
{ return derived() = allowAligned() * other; }
|
||||||
|
|
||||||
|
Derived& operator/=(const Scalar& other)
|
||||||
|
{ return derived() = allowAligned() / other; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const Scalar* __restrict__ m_data;
|
||||||
|
const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
|
||||||
|
const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // EIGEN_MAPBASE_H
|
@ -257,9 +257,9 @@ template<typename Derived> class MatrixBase
|
|||||||
void copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other);
|
void copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other);
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
void copyCoeff(int index, const MatrixBase<OtherDerived>& other);
|
void copyCoeff(int index, const MatrixBase<OtherDerived>& other);
|
||||||
template<typename OtherDerived, int LoadStoreMode>
|
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||||
void copyPacket(int row, int col, const MatrixBase<OtherDerived>& other);
|
void copyPacket(int row, int col, const MatrixBase<OtherDerived>& other);
|
||||||
template<typename OtherDerived, int LoadStoreMode>
|
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||||
void copyPacket(int index, const MatrixBase<OtherDerived>& other);
|
void copyPacket(int index, const MatrixBase<OtherDerived>& other);
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
|
@ -33,17 +33,22 @@
|
|||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
struct ei_sum_traits
|
struct ei_sum_traits
|
||||||
{
|
{
|
||||||
|
private:
|
||||||
|
enum {
|
||||||
|
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
|
||||||
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
enum {
|
enum {
|
||||||
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
|
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
|
||||||
&& (int(Derived::Flags)&LinearAccessBit)
|
&& (int(Derived::Flags)&LinearAccessBit)
|
||||||
|
&& (int(Derived::SizeAtCompileTime)>2*PacketSize)
|
||||||
? LinearVectorization
|
? LinearVectorization
|
||||||
: NoVectorization
|
: NoVectorization
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum {
|
enum {
|
||||||
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
|
|
||||||
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
|
Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost
|
||||||
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
|
+ (Derived::SizeAtCompileTime-1) * NumTraits<typename Derived::Scalar>::AddCost,
|
||||||
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
|
UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize))
|
||||||
@ -131,7 +136,8 @@ struct ei_sum_vec_unroller<Derived, Index, Stop, true>
|
|||||||
: Index % Derived::RowsAtCompileTime,
|
: Index % Derived::RowsAtCompileTime,
|
||||||
col = int(Derived::Flags)&RowMajorBit
|
col = int(Derived::Flags)&RowMajorBit
|
||||||
? Index % int(Derived::ColsAtCompileTime)
|
? Index % int(Derived::ColsAtCompileTime)
|
||||||
: Index / Derived::RowsAtCompileTime
|
: Index / Derived::RowsAtCompileTime,
|
||||||
|
alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef typename Derived::Scalar Scalar;
|
typedef typename Derived::Scalar Scalar;
|
||||||
@ -139,7 +145,7 @@ struct ei_sum_vec_unroller<Derived, Index, Stop, true>
|
|||||||
|
|
||||||
inline static PacketScalar run(const Derived &mat)
|
inline static PacketScalar run(const Derived &mat)
|
||||||
{
|
{
|
||||||
return mat.template packet<Aligned>(row, col);
|
return mat.template packet<alignment>(row, col);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -185,14 +191,21 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
|
|||||||
{
|
{
|
||||||
const int size = mat.size();
|
const int size = mat.size();
|
||||||
const int packetSize = ei_packet_traits<Scalar>::size;
|
const int packetSize = ei_packet_traits<Scalar>::size;
|
||||||
const int alignedSize = (size/packetSize)*packetSize;
|
const int alignedStart = (Derived::Flags & AlignedBit)
|
||||||
|
|| !(Derived::Flags & DirectAccessBit)
|
||||||
|
? 0
|
||||||
|
: ei_alignmentOffset(&mat.const_cast_derived().coeffRef(0), size);
|
||||||
|
const int alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
|
||||||
|
? Aligned : Unaligned;
|
||||||
|
const int alignedSize = ((size-alignedStart)/packetSize)*packetSize;
|
||||||
|
const int alignedEnd = alignedStart + alignedSize;
|
||||||
Scalar res;
|
Scalar res;
|
||||||
|
|
||||||
if(size >= packetSize)
|
if(Derived::SizeAtCompileTime>=2*packetSize && alignedSize >= 2*packetSize)
|
||||||
{
|
{
|
||||||
PacketScalar packet_res = mat.template packet<Aligned>(0, 0);
|
PacketScalar packet_res = mat.template packet<alignment>(alignedStart, alignedStart);
|
||||||
for(int index = packetSize; index < alignedSize; index += packetSize)
|
for(int index = alignedStart + packetSize; index < alignedEnd; index += packetSize)
|
||||||
packet_res = ei_padd(packet_res, mat.template packet<Aligned>(index));
|
packet_res = ei_padd(packet_res, mat.template packet<alignment>(index));
|
||||||
|
|
||||||
res = ei_predux(packet_res);
|
res = ei_predux(packet_res);
|
||||||
}
|
}
|
||||||
@ -202,10 +215,11 @@ struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
|
|||||||
res = Scalar(0);
|
res = Scalar(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int index = alignedSize; index < size; index++)
|
for(int index = alignedEnd; index < size; index++)
|
||||||
{
|
res += mat.coeff(index);
|
||||||
|
|
||||||
|
for(int index = alignedEnd; index < size; index++)
|
||||||
res += mat.coeff(index);
|
res += mat.coeff(index);
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -59,6 +59,16 @@ template<typename ExpressionType> class SwapWrapper
|
|||||||
inline int cols() const { return m_expression.cols(); }
|
inline int cols() const { return m_expression.cols(); }
|
||||||
inline int stride() const { return m_expression.stride(); }
|
inline int stride() const { return m_expression.stride(); }
|
||||||
|
|
||||||
|
inline Scalar& coeffRef(int row, int col)
|
||||||
|
{
|
||||||
|
return m_expression.const_cast_derived().coeffRef(row, col);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Scalar& coeffRef(int index)
|
||||||
|
{
|
||||||
|
return m_expression.const_cast_derived().coeffRef(index);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
void copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other)
|
void copyCoeff(int row, int col, const MatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
@ -80,29 +90,29 @@ template<typename ExpressionType> class SwapWrapper
|
|||||||
_other.coeffRef(index) = tmp;
|
_other.coeffRef(index) = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename OtherDerived, int LoadStoreMode>
|
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||||
void copyPacket(int row, int col, const MatrixBase<OtherDerived>& other)
|
void copyPacket(int row, int col, const MatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
OtherDerived& _other = other.const_cast_derived();
|
OtherDerived& _other = other.const_cast_derived();
|
||||||
ei_internal_assert(row >= 0 && row < rows()
|
ei_internal_assert(row >= 0 && row < rows()
|
||||||
&& col >= 0 && col < cols());
|
&& col >= 0 && col < cols());
|
||||||
Packet tmp = m_expression.template packet<LoadStoreMode>(row, col);
|
Packet tmp = m_expression.template packet<StoreMode>(row, col);
|
||||||
m_expression.template writePacket<LoadStoreMode>(row, col,
|
m_expression.template writePacket<StoreMode>(row, col,
|
||||||
_other.template packet<LoadStoreMode>(row, col)
|
_other.template packet<LoadMode>(row, col)
|
||||||
);
|
);
|
||||||
_other.template writePacket<LoadStoreMode>(row, col, tmp);
|
_other.template writePacket<LoadMode>(row, col, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename OtherDerived, int LoadStoreMode>
|
template<typename OtherDerived, int StoreMode, int LoadMode>
|
||||||
void copyPacket(int index, const MatrixBase<OtherDerived>& other)
|
void copyPacket(int index, const MatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
OtherDerived& _other = other.const_cast_derived();
|
OtherDerived& _other = other.const_cast_derived();
|
||||||
ei_internal_assert(index >= 0 && index < m_expression.size());
|
ei_internal_assert(index >= 0 && index < m_expression.size());
|
||||||
Packet tmp = m_expression.template packet<LoadStoreMode>(index);
|
Packet tmp = m_expression.template packet<StoreMode>(index);
|
||||||
m_expression.template writePacket<LoadStoreMode>(index,
|
m_expression.template writePacket<StoreMode>(index,
|
||||||
_other.template packet<LoadStoreMode>(index)
|
_other.template packet<LoadMode>(index)
|
||||||
);
|
);
|
||||||
_other.template writePacket<LoadStoreMode>(index, tmp);
|
_other.template writePacket<LoadMode>(index, tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -119,42 +119,47 @@ const unsigned int LinearAccessBit = 0x10;
|
|||||||
*/
|
*/
|
||||||
const unsigned int DirectAccessBit = 0x20;
|
const unsigned int DirectAccessBit = 0x20;
|
||||||
|
|
||||||
|
/** \ingroup flags
|
||||||
|
*
|
||||||
|
* means the first coefficient packet is guaranteed to be aligned */
|
||||||
|
const unsigned int AlignedBit = 0x40;
|
||||||
|
|
||||||
/** \ingroup flags
|
/** \ingroup flags
|
||||||
*
|
*
|
||||||
* means all diagonal coefficients are equal to 0 */
|
* means all diagonal coefficients are equal to 0 */
|
||||||
const unsigned int ZeroDiagBit = 0x40;
|
const unsigned int ZeroDiagBit = 0x80;
|
||||||
|
|
||||||
/** \ingroup flags
|
/** \ingroup flags
|
||||||
*
|
*
|
||||||
* means all diagonal coefficients are equal to 1 */
|
* means all diagonal coefficients are equal to 1 */
|
||||||
const unsigned int UnitDiagBit = 0x80;
|
const unsigned int UnitDiagBit = 0x100;
|
||||||
|
|
||||||
/** \ingroup flags
|
/** \ingroup flags
|
||||||
*
|
*
|
||||||
* means the matrix is selfadjoint (M=M*). */
|
* means the matrix is selfadjoint (M=M*). */
|
||||||
const unsigned int SelfAdjointBit = 0x100;
|
const unsigned int SelfAdjointBit = 0x200;
|
||||||
|
|
||||||
/** \ingroup flags
|
/** \ingroup flags
|
||||||
*
|
*
|
||||||
* means the strictly lower triangular part is 0 */
|
* means the strictly lower triangular part is 0 */
|
||||||
const unsigned int UpperTriangularBit = 0x200;
|
const unsigned int UpperTriangularBit = 0x400;
|
||||||
|
|
||||||
/** \ingroup flags
|
/** \ingroup flags
|
||||||
*
|
*
|
||||||
* means the strictly upper triangular part is 0 */
|
* means the strictly upper triangular part is 0 */
|
||||||
const unsigned int LowerTriangularBit = 0x400;
|
const unsigned int LowerTriangularBit = 0x800;
|
||||||
|
|
||||||
/** \ingroup flags
|
/** \ingroup flags
|
||||||
*
|
*
|
||||||
* means the expression includes sparse matrices and the sparse path has to be taken. */
|
* means the expression includes sparse matrices and the sparse path has to be taken. */
|
||||||
const unsigned int SparseBit = 0x800;
|
const unsigned int SparseBit = 0x1000;
|
||||||
|
|
||||||
/** \ingroup flags
|
/** \ingroup flags
|
||||||
*
|
*
|
||||||
* currently unused. Means the matrix probably has a very big size.
|
* currently unused. Means the matrix probably has a very big size.
|
||||||
* Could eventually be used as a hint to determine which algorithms
|
* Could eventually be used as a hint to determine which algorithms
|
||||||
* to use. */
|
* to use. */
|
||||||
const unsigned int LargeBit = 0x1000;
|
const unsigned int LargeBit = 0x2000;
|
||||||
|
|
||||||
// list of flags that are inherited by default
|
// list of flags that are inherited by default
|
||||||
const unsigned int HereditaryBits = RowMajorBit
|
const unsigned int HereditaryBits = RowMajorBit
|
||||||
@ -175,15 +180,21 @@ const unsigned int UnitUpper = UpperTriangularBit | UnitDiagBit;
|
|||||||
const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit;
|
const unsigned int UnitLower = LowerTriangularBit | UnitDiagBit;
|
||||||
const unsigned int Diagonal = Upper | Lower;
|
const unsigned int Diagonal = Upper | Lower;
|
||||||
|
|
||||||
enum { Aligned=0, Unaligned=1 };
|
enum { Aligned=0, Unaligned=1, Unknown=2 };
|
||||||
enum { ConditionalJumpCost = 5 };
|
enum { ConditionalJumpCost = 5 };
|
||||||
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
|
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
|
||||||
enum DirectionType { Vertical, Horizontal };
|
enum DirectionType { Vertical, Horizontal };
|
||||||
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct, SparseProduct };
|
enum ProductEvaluationMode { NormalProduct, CacheFriendlyProduct, DiagonalProduct, SparseProduct };
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
/** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignement
|
||||||
|
* and good size */
|
||||||
InnerVectorization,
|
InnerVectorization,
|
||||||
|
/** \internal Vectorization path using a single loop plus scalar loops for the
|
||||||
|
* unaligned boundaries */
|
||||||
LinearVectorization,
|
LinearVectorization,
|
||||||
|
/** \internal Generic vectorization path using one vectorized loop per row/column with some
|
||||||
|
* scalar loops to handle the unaligned boundaries */
|
||||||
SliceVectorization,
|
SliceVectorization,
|
||||||
NoVectorization
|
NoVectorization
|
||||||
};
|
};
|
||||||
|
@ -43,8 +43,8 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
|
|||||||
template<typename ExpressionType> class NestByValue;
|
template<typename ExpressionType> class NestByValue;
|
||||||
template<typename ExpressionType> class SwapWrapper;
|
template<typename ExpressionType> class SwapWrapper;
|
||||||
template<typename MatrixType> class Minor;
|
template<typename MatrixType> class Minor;
|
||||||
template<typename MatrixType, int BlockRows=Dynamic, int BlockCols=Dynamic,
|
template<typename MatrixType, int BlockRows=Dynamic, int BlockCols=Dynamic, int PacketAccess=Unaligned,
|
||||||
int DirectAccessStatus = ei_traits<MatrixType>::Flags&DirectAccessBit> class Block;
|
int _DirectAccessStatus = ei_traits<MatrixType>::Flags&DirectAccessBit> class Block;
|
||||||
template<typename MatrixType> class Transpose;
|
template<typename MatrixType> class Transpose;
|
||||||
template<typename MatrixType> class Conjugate;
|
template<typename MatrixType> class Conjugate;
|
||||||
template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
|
template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
|
||||||
@ -53,7 +53,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
|
|||||||
template<typename Lhs, typename Rhs, int ProductMode> class Product;
|
template<typename Lhs, typename Rhs, int ProductMode> class Product;
|
||||||
template<typename CoeffsVectorType> class DiagonalMatrix;
|
template<typename CoeffsVectorType> class DiagonalMatrix;
|
||||||
template<typename MatrixType> class DiagonalCoeffs;
|
template<typename MatrixType> class DiagonalCoeffs;
|
||||||
template<typename MatrixType, int Alignment = Unaligned> class Map;
|
template<typename MatrixType, int PacketAccess = Unaligned> class Map;
|
||||||
template<typename MatrixType, unsigned int Mode> class Part;
|
template<typename MatrixType, unsigned int Mode> class Part;
|
||||||
template<typename MatrixType, unsigned int Mode> class Extract;
|
template<typename MatrixType, unsigned int Mode> class Extract;
|
||||||
template<typename ExpressionType> class Cwise;
|
template<typename ExpressionType> class Cwise;
|
||||||
|
@ -168,12 +168,14 @@ class ei_corrected_matrix_flags
|
|||||||
packet_access_bit
|
packet_access_bit
|
||||||
= ei_packet_traits<Scalar>::size > 1
|
= ei_packet_traits<Scalar>::size > 1
|
||||||
&& (is_big || linear_size%ei_packet_traits<Scalar>::size==0)
|
&& (is_big || linear_size%ei_packet_traits<Scalar>::size==0)
|
||||||
? PacketAccessBit : 0
|
? PacketAccessBit : 0,
|
||||||
|
aligned_bit = packet_access_bit
|
||||||
|
&& (is_big || linear_size%ei_packet_traits<Scalar>::size==0) ? AlignedBit : 0
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
enum { ret = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | PacketAccessBit | RowMajorBit))
|
enum { ret = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit | PacketAccessBit | RowMajorBit))
|
||||||
| LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit
|
| LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -105,8 +105,11 @@ public:
|
|||||||
|
|
||||||
/** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
|
/** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
|
||||||
* its four coefficients \a w, \a x, \a y and \a z.
|
* its four coefficients \a w, \a x, \a y and \a z.
|
||||||
|
*
|
||||||
|
* \warning Note the order of the arguments: the real \a w coefficient first,
|
||||||
|
* while internally the coefficients are stored in the following order:
|
||||||
|
* [\c x, \c y, \c z, \c w]
|
||||||
*/
|
*/
|
||||||
// FIXME what is the prefered order: w x,y,z or x,y,z,w ?
|
|
||||||
inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z)
|
inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z)
|
||||||
{ m_coeffs << x, y, z, w; }
|
{ m_coeffs << x, y, z, w; }
|
||||||
|
|
||||||
@ -313,8 +316,8 @@ inline Quaternion<Scalar>& Quaternion<Scalar>::setFromTwoVectors(const MatrixBas
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the multiplicative inverse of \c *this
|
/** \returns the multiplicative inverse of \c *this
|
||||||
* Note that in most cases, i.e., if you simply want the opposite
|
* Note that in most cases, i.e., if you simply want the opposite rotation,
|
||||||
* rotation, it is enough to use the conjugate.
|
* and/or the quaternion is normalized, then it is enough to use the conjugate.
|
||||||
*
|
*
|
||||||
* \sa Quaternion::conjugate()
|
* \sa Quaternion::conjugate()
|
||||||
*/
|
*/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user