mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-14 18:33:16 +08:00
* more cleaning in Product
* make Matrix2f (and similar) vectorized using linear path * fix a couple of warnings and compilation issues with ICC and gcc 3.3/3.4 (cannot get Transform compiles with gcc 3.3/3.4, see the FIXME)
This commit is contained in:
parent
82c3cea1d5
commit
fb4a151982
@ -351,23 +351,25 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling
|
||||
{
|
||||
inline static void run(Derived1 &dst, const Derived2 &src)
|
||||
{
|
||||
const int size = Derived1::SizeAtCompileTime;
|
||||
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||
const int alignedSize = (size/packetSize)*packetSize;
|
||||
const bool rowMajor = Derived1::Flags&RowMajorBit;
|
||||
const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
|
||||
const int outerSize = rowMajor ? Derived1::RowsAtCompileTime : Derived1::ColsAtCompileTime;
|
||||
int index = 0;
|
||||
enum {
|
||||
size = Derived1::SizeAtCompileTime,
|
||||
packetSize = ei_packet_traits<typename Derived1::Scalar>::size,
|
||||
alignedSize = (int(size)/int(packetSize))*int(packetSize),
|
||||
rowMajor = int(Derived1::Flags)&RowMajorBit,
|
||||
innerSize = int(rowMajor) ? int(Derived1::ColsAtCompileTime) : int(Derived1::RowsAtCompileTime),
|
||||
outerSize = int(rowMajor) ? int(Derived1::RowsAtCompileTime) : int(Derived1::ColsAtCompileTime)
|
||||
};
|
||||
|
||||
// do the vectorizable part of the assignment
|
||||
ei_assign_innervec_CompleteUnrolling<Derived1, Derived2, 0, alignedSize>::run(dst, src);
|
||||
|
||||
// now we must do the rest without vectorization.
|
||||
const int k = alignedSize/innerSize;
|
||||
const int i = alignedSize%innerSize;
|
||||
|
||||
enum {
|
||||
k = int(alignedSize)/int(innerSize),
|
||||
i = int(alignedSize)%int(innerSize)
|
||||
};
|
||||
// do the remainder of the current row or col
|
||||
ei_assign_novec_InnerUnrolling<Derived1, Derived2, i, innerSize>::run(dst, src, k);
|
||||
ei_assign_novec_InnerUnrolling<Derived1, Derived2, i, int(k)<int(outerSize) ? int(innerSize) : 0>::run(dst, src, k);
|
||||
|
||||
// do the remaining rows or cols
|
||||
for(int j = k+1; j < outerSize; j++)
|
||||
|
@ -101,8 +101,8 @@ template<typename MatrixType> class DiagonalCoeffs
|
||||
*
|
||||
* \sa class DiagonalCoeffs */
|
||||
template<typename Derived>
|
||||
DiagonalCoeffs<Derived>
|
||||
inline MatrixBase<Derived>::diagonal()
|
||||
inline DiagonalCoeffs<Derived>
|
||||
MatrixBase<Derived>::diagonal()
|
||||
{
|
||||
return DiagonalCoeffs<Derived>(derived());
|
||||
}
|
||||
|
@ -201,14 +201,21 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
|
||||
ei_assert(lhs.cols() == rhs.rows());
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<typename DestDerived>
|
||||
void _cacheFriendlyEval(DestDerived& res) const;
|
||||
|
||||
/** \internal */
|
||||
/** \internal
|
||||
* compute \a res += \c *this using the cache friendly product.
|
||||
*/
|
||||
template<typename DestDerived>
|
||||
void _cacheFriendlyEvalAndAdd(DestDerived& res) const;
|
||||
|
||||
/** \internal
|
||||
* \returns whether it is worth it to use the cache friendly product.
|
||||
*/
|
||||
inline bool _useCacheFriendlyProduct() const {
|
||||
return _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
inline int _rows() const { return m_lhs.rows(); }
|
||||
@ -229,7 +236,7 @@ template<typename LhsNested, typename RhsNested, int ProductMode> class Product
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename Lhs_, typename Rhs_, int EvalMode_, typename DestDerived_, bool DirectAccess_>
|
||||
template<typename Lhs_, typename Rhs_, int ProductMode_, typename DestDerived_, bool DirectAccess_>
|
||||
friend struct ei_cache_friendly_selector;
|
||||
|
||||
protected:
|
||||
@ -419,7 +426,10 @@ template<typename Lhs,typename Rhs>
|
||||
inline Derived&
|
||||
MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
|
||||
{
|
||||
other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
|
||||
if (other._expression()._useCacheFriendlyProduct())
|
||||
other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
|
||||
else
|
||||
lazyAssign(derived() + other._expression());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@ -427,7 +437,15 @@ template<typename Derived>
|
||||
template<typename Lhs, typename Rhs>
|
||||
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product)
|
||||
{
|
||||
product._cacheFriendlyEval(derived());
|
||||
if (product._useCacheFriendlyProduct())
|
||||
{
|
||||
setZero();
|
||||
product._cacheFriendlyEvalAndAdd(derived());
|
||||
}
|
||||
else
|
||||
{
|
||||
lazyAssign<Product<Lhs,Rhs,CacheFriendlyProduct> >(product);
|
||||
}
|
||||
return derived();
|
||||
}
|
||||
|
||||
@ -472,61 +490,22 @@ template<typename T> struct ei_product_copy_lhs
|
||||
>::ret type;
|
||||
};
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename Lhs, typename Rhs, int ProductMode>
|
||||
template<typename DestDerived>
|
||||
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
|
||||
inline void Product<Lhs,Rhs,ProductMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
|
||||
{
|
||||
if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
)
|
||||
{
|
||||
res.setZero();
|
||||
typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy;
|
||||
typedef typename ei_unref<LhsCopy>::type _LhsCopy;
|
||||
typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy;
|
||||
typedef typename ei_unref<RhsCopy>::type _RhsCopy;
|
||||
LhsCopy lhs(m_lhs);
|
||||
RhsCopy rhs(m_rhs);
|
||||
ei_cache_friendly_product<Scalar>(
|
||||
_rows(), _cols(), lhs.cols(),
|
||||
_LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(),
|
||||
_RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(),
|
||||
Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy();
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived>
|
||||
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
|
||||
{
|
||||
if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
)
|
||||
{
|
||||
typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy;
|
||||
typedef typename ei_unref<LhsCopy>::type _LhsCopy;
|
||||
typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy;
|
||||
typedef typename ei_unref<RhsCopy>::type _RhsCopy;
|
||||
LhsCopy lhs(m_lhs);
|
||||
RhsCopy rhs(m_rhs);
|
||||
ei_cache_friendly_product<Scalar>(
|
||||
_rows(), _cols(), lhs.cols(),
|
||||
_LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(),
|
||||
_RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(),
|
||||
Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
res += Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy();
|
||||
}
|
||||
typedef typename ei_product_copy_lhs<_LhsNested>::type LhsCopy;
|
||||
typedef typename ei_unref<LhsCopy>::type _LhsCopy;
|
||||
typedef typename ei_product_copy_rhs<_RhsNested>::type RhsCopy;
|
||||
typedef typename ei_unref<RhsCopy>::type _RhsCopy;
|
||||
LhsCopy lhs(m_lhs);
|
||||
RhsCopy rhs(m_rhs);
|
||||
ei_cache_friendly_product<Scalar>(
|
||||
_rows(), _cols(), lhs.cols(),
|
||||
_LhsCopy::Flags&RowMajorBit, &(lhs.const_cast_derived().coeffRef(0,0)), lhs.stride(),
|
||||
_RhsCopy::Flags&RowMajorBit, &(rhs.const_cast_derived().coeffRef(0,0)), rhs.stride(),
|
||||
Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
|
||||
);
|
||||
}
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
|
@ -101,8 +101,8 @@ MatrixBase<Derived>::redux(const BinaryOp& func) const
|
||||
* \sa trace()
|
||||
*/
|
||||
template<typename Derived>
|
||||
typename ei_traits<Derived>::Scalar
|
||||
inline MatrixBase<Derived>::sum() const
|
||||
inline typename ei_traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::sum() const
|
||||
{
|
||||
return this->redux(Eigen::ei_scalar_sum_op<Scalar>());
|
||||
}
|
||||
@ -114,8 +114,8 @@ inline MatrixBase<Derived>::sum() const
|
||||
* \sa diagonal(), sum()
|
||||
*/
|
||||
template<typename Derived>
|
||||
typename ei_traits<Derived>::Scalar
|
||||
inline MatrixBase<Derived>::trace() const
|
||||
inline typename ei_traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::trace() const
|
||||
{
|
||||
return diagonal().sum();
|
||||
}
|
||||
@ -123,8 +123,8 @@ inline MatrixBase<Derived>::trace() const
|
||||
/** \returns the minimum of all coefficients of *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
typename ei_traits<Derived>::Scalar
|
||||
inline MatrixBase<Derived>::minCoeff() const
|
||||
inline typename ei_traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::minCoeff() const
|
||||
{
|
||||
return this->redux(Eigen::ei_scalar_min_op<Scalar>());
|
||||
}
|
||||
@ -132,8 +132,8 @@ inline MatrixBase<Derived>::minCoeff() const
|
||||
/** \returns the maximum of all coefficients of *this
|
||||
*/
|
||||
template<typename Derived>
|
||||
typename ei_traits<Derived>::Scalar
|
||||
inline MatrixBase<Derived>::maxCoeff() const
|
||||
inline typename ei_traits<Derived>::Scalar
|
||||
MatrixBase<Derived>::maxCoeff() const
|
||||
{
|
||||
return this->redux(Eigen::ei_scalar_max_op<Scalar>());
|
||||
}
|
||||
|
@ -156,10 +156,10 @@ class ei_corrected_matrix_flags
|
||||
? SuggestedFlags&RowMajorBit
|
||||
: Cols > 1 ? RowMajorBit : 0,
|
||||
is_big = MaxRows == Dynamic || MaxCols == Dynamic,
|
||||
inner_size = row_major_bit ? Cols : Rows,
|
||||
linear_size = Cols * Rows,
|
||||
packet_access_bit
|
||||
= ei_packet_traits<Scalar>::size > 1
|
||||
&& (is_big || inner_size%ei_packet_traits<Scalar>::size==0)
|
||||
&& (is_big || linear_size%ei_packet_traits<Scalar>::size==0)
|
||||
? PacketAccessBit : 0
|
||||
};
|
||||
|
||||
|
@ -28,8 +28,8 @@
|
||||
/** \returns the cross product of \c *this and \a other */
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
typename ei_eval<Derived>::type
|
||||
inline MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
|
||||
inline typename ei_eval<Derived>::type
|
||||
MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
|
||||
{
|
||||
// Note that there is no need for an expression here since the compiler
|
||||
// optimize such a small temporary very well (even within a complex expression)
|
||||
|
@ -62,6 +62,47 @@ protected:
|
||||
int OtherCols=Other::ColsAtCompileTime>
|
||||
struct ei_transform_product_impl;
|
||||
|
||||
// FIXME these specializations of ei_transform_product_impl does not work with gcc 3.3 and 3.4 because
|
||||
// Dim depends on a template parameter. Replacing Dim by 3 (for the 3D case) works.
|
||||
|
||||
// note that these specializations have to be defined here,
|
||||
// otherwise some compilers (at least ICC and NVCC) complain about
|
||||
// the use of Dim in the specialization parameters.
|
||||
template<typename Other>
|
||||
struct ei_transform_product_impl<Other,Dim+1,Dim+1>
|
||||
{
|
||||
typedef typename Transform<Scalar,Dim>::MatrixType MatrixType;
|
||||
typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
|
||||
static ResultType run(const Transform<Scalar,Dim>& tr, const Other& other)
|
||||
{ return tr.matrix() * other; }
|
||||
};
|
||||
|
||||
template<typename Other>
|
||||
struct ei_transform_product_impl<Other,Dim+1,1>
|
||||
{
|
||||
typedef typename Transform<Scalar,Dim>::MatrixType MatrixType;
|
||||
typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
|
||||
static ResultType run(const Transform<Scalar,Dim>& tr, const Other& other)
|
||||
{ return tr.matrix() * other; }
|
||||
};
|
||||
|
||||
template<typename Other>
|
||||
struct ei_transform_product_impl<Other,Dim,1>
|
||||
{
|
||||
typedef typename Transform<Scalar,Dim>::AffineMatrixRef MatrixType;
|
||||
typedef const CwiseUnaryOp<
|
||||
ei_scalar_multiple_op<Scalar>,
|
||||
NestByValue<CwiseBinaryOp<
|
||||
ei_scalar_sum_op<Scalar>,
|
||||
NestByValue<typename ProductReturnType<NestByValue<MatrixType>,Other>::Type >,
|
||||
NestByValue<typename Transform<Scalar,Dim>::VectorRef> > >
|
||||
> ResultType;
|
||||
// FIXME shall we offer an optimized version when the last row is know to be 0,0...,0,1 ?
|
||||
static ResultType run(const Transform<Scalar,Dim>& tr, const Other& other)
|
||||
{ return ((tr.affine().nestByValue() * other).nestByValue() + tr.translation().nestByValue()).nestByValue()
|
||||
* (Scalar(1) / ( (tr.matrix().template block<1,Dim>(Dim,0) * other).coeff(0) + tr.matrix().coeff(Dim,Dim))); }
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
/** Default constructor without initialization of the coefficients. */
|
||||
@ -103,13 +144,7 @@ public:
|
||||
inline VectorRef translation() { return m_matrix.template block<Dim,1>(0,Dim); }
|
||||
|
||||
template<typename OtherDerived>
|
||||
struct TransformProductReturnType
|
||||
{
|
||||
typedef typename ei_transform_product_impl<OtherDerived>::ResultType Type;
|
||||
};
|
||||
|
||||
template<typename OtherDerived>
|
||||
const typename TransformProductReturnType<OtherDerived>::Type
|
||||
const typename ei_transform_product_impl<OtherDerived>::ResultType
|
||||
operator * (const MatrixBase<OtherDerived> &other) const;
|
||||
|
||||
/** Contatenates two transformations */
|
||||
@ -192,7 +227,7 @@ QMatrix Transform<Scalar,Dim>::toQMatrix(void) const
|
||||
|
||||
template<typename Scalar, int Dim>
|
||||
template<typename OtherDerived>
|
||||
const typename Transform<Scalar,Dim>::template TransformProductReturnType<OtherDerived>::Type
|
||||
const typename Transform<Scalar,Dim>::template ei_transform_product_impl<OtherDerived>::ResultType
|
||||
Transform<Scalar,Dim>::operator*(const MatrixBase<OtherDerived> &other) const
|
||||
{
|
||||
return ei_transform_product_impl<OtherDerived>::run(*this,other.derived());
|
||||
@ -373,44 +408,4 @@ Transform<Scalar,Dim>::fromPositionOrientationScale(const MatrixBase<PositionDer
|
||||
return *this;
|
||||
}
|
||||
|
||||
//----------
|
||||
|
||||
template<typename Scalar, int Dim>
|
||||
template<typename Other>
|
||||
struct Transform<Scalar,Dim>::ei_transform_product_impl<Other,Dim+1,Dim+1>
|
||||
{
|
||||
typedef typename Transform<Scalar,Dim>::MatrixType MatrixType;
|
||||
typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
|
||||
static ResultType run(const Transform<Scalar,Dim>& tr, const Other& other)
|
||||
{ return tr.matrix() * other; }
|
||||
};
|
||||
|
||||
template<typename Scalar, int Dim>
|
||||
template<typename Other>
|
||||
struct Transform<Scalar,Dim>::ei_transform_product_impl<Other,Dim+1,1>
|
||||
{
|
||||
typedef typename Transform<Scalar,Dim>::MatrixType MatrixType;
|
||||
typedef typename ProductReturnType<MatrixType,Other>::Type ResultType;
|
||||
static ResultType run(const Transform<Scalar,Dim>& tr, const Other& other)
|
||||
{ return tr.matrix() * other; }
|
||||
};
|
||||
|
||||
template<typename Scalar, int Dim>
|
||||
template<typename Other>
|
||||
struct Transform<Scalar,Dim>::ei_transform_product_impl<Other,Dim,1>
|
||||
{
|
||||
typedef typename Transform<Scalar,Dim>::AffineMatrixRef MatrixType;
|
||||
typedef const CwiseUnaryOp<
|
||||
ei_scalar_multiple_op<Scalar>,
|
||||
NestByValue<CwiseBinaryOp<
|
||||
ei_scalar_sum_op<Scalar>,
|
||||
NestByValue<typename ProductReturnType<NestByValue<MatrixType>,Other>::Type >,
|
||||
NestByValue<typename Transform<Scalar,Dim>::VectorRef> > >
|
||||
> ResultType;
|
||||
// FIXME shall we offer an optimized version when the last row is know to be 0,0...,0,1 ?
|
||||
static ResultType run(const Transform<Scalar,Dim>& tr, const Other& other)
|
||||
{ return ((tr.affine().nestByValue() * other).nestByValue() + tr.translation().nestByValue()).nestByValue()
|
||||
* (Scalar(1) / ( (tr.matrix().template block<1,Dim>(Dim,0) * other).coeff(0) + tr.matrix().coeff(Dim,Dim))); }
|
||||
};
|
||||
|
||||
#endif // EIGEN_TRANSFORM_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user