add EIGEN_DEVICE_FUNC to methods

This commit is contained in:
acxz 2020-12-01 03:08:47 +00:00 committed by Rasmus Munk Larsen
parent 89f90b585d
commit 7b80609d49
4 changed files with 88 additions and 86 deletions

View File

@ -53,7 +53,7 @@ namespace internal {
* decomposition to determine whether a system of equations has a solution. * decomposition to determine whether a system of equations has a solution.
* *
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
* *
* \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT
*/ */
template<typename _MatrixType, int _UpLo> class LDLT template<typename _MatrixType, int _UpLo> class LDLT
@ -246,8 +246,8 @@ template<typename _MatrixType, int _UpLo> class LDLT
*/ */
const LDLT& adjoint() const { return *this; }; const LDLT& adjoint() const { return *this; };
inline Index rows() const { return m_matrix.rows(); } EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); } EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
/** \brief Reports whether previous computation was successful. /** \brief Reports whether previous computation was successful.
* *

View File

@ -14,7 +14,7 @@
#define EIGEN_PRODUCTEVALUATORS_H #define EIGEN_PRODUCTEVALUATORS_H
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
/** \internal /** \internal
@ -22,19 +22,19 @@ namespace internal {
* Since products require special treatments to handle all possible cases, * Since products require special treatments to handle all possible cases,
* we simply defer the evaluation logic to a product_evaluator class * we simply defer the evaluation logic to a product_evaluator class
* which offers more partial specialization possibilities. * which offers more partial specialization possibilities.
* *
* \sa class product_evaluator * \sa class product_evaluator
*/ */
template<typename Lhs, typename Rhs, int Options> template<typename Lhs, typename Rhs, int Options>
struct evaluator<Product<Lhs, Rhs, Options> > struct evaluator<Product<Lhs, Rhs, Options> >
: public product_evaluator<Product<Lhs, Rhs, Options> > : public product_evaluator<Product<Lhs, Rhs, Options> >
{ {
typedef Product<Lhs, Rhs, Options> XprType; typedef Product<Lhs, Rhs, Options> XprType;
typedef product_evaluator<XprType> Base; typedef product_evaluator<XprType> Base;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
}; };
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
// TODO we should apply that rule only if that's really helpful // TODO we should apply that rule only if that's really helpful
template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1> template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
@ -62,12 +62,12 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
template<typename Lhs, typename Rhs, int DiagIndex> template<typename Lhs, typename Rhs, int DiagIndex>
struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> > struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
: public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
{ {
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType; typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base; typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>( : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
@ -108,23 +108,23 @@ struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsSh
: m_result(xpr.rows(), xpr.cols()) : m_result(xpr.rows(), xpr.cols())
{ {
::new (static_cast<Base*>(this)) Base(m_result); ::new (static_cast<Base*>(this)) Base(m_result);
// FIXME shall we handle nested_eval here?, // FIXME shall we handle nested_eval here?,
// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; // typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; // typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; // typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; // typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
// //
// const LhsNested lhs(xpr.lhs()); // const LhsNested lhs(xpr.lhs());
// const RhsNested rhs(xpr.rhs()); // const RhsNested rhs(xpr.rhs());
// //
// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs); // generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs()); generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
} }
protected: protected:
PlainObject m_result; PlainObject m_result;
}; };
@ -250,7 +250,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
{ {
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
} }
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
@ -298,7 +298,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
{ {
template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
@ -310,31 +310,31 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
dst.const_cast_derived() += m_scale * src; dst.const_cast_derived() += m_scale * src;
} }
}; };
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
} }
}; };
@ -343,7 +343,7 @@ template<typename Lhs, typename Rhs, typename Derived>
struct generic_product_impl_base struct generic_product_impl_base
{ {
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
@ -355,7 +355,7 @@ struct generic_product_impl_base
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
@ -379,7 +379,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
if (lhs.rows() == 1 && rhs.cols() == 1) { if (lhs.rows() == 1 && rhs.cols() == 1) {
dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0)); dst.coeffRef(0,0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0));
return; return;
} }
LhsNested actual_lhs(lhs); LhsNested actual_lhs(lhs);
RhsNested actual_rhs(rhs); RhsNested actual_rhs(rhs);
internal::gemv_dense_selector<Side, internal::gemv_dense_selector<Side,
@ -390,10 +390,10 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
}; };
template<typename Lhs, typename Rhs> template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
{ {
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
@ -408,7 +408,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
// dst.noalias() += lhs.lazyProduct(rhs); // dst.noalias() += lhs.lazyProduct(rhs);
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>()); call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
} }
template<typename Dst> template<typename Dst>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
@ -525,7 +525,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested; typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested; typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned; typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned; typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
@ -544,7 +544,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType; typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
enum { enum {
LhsCoeffReadCost = LhsEtorType::CoeffReadCost, LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
RhsCoeffReadCost = RhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
@ -553,10 +553,10 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost, + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
LhsFlags = LhsEtorType::Flags, LhsFlags = LhsEtorType::Flags,
RhsFlags = RhsEtorType::Flags, RhsFlags = RhsEtorType::Flags,
LhsRowMajor = LhsFlags & RowMajorBit, LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit,
@ -566,7 +566,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
// Here, we don't care about alignment larger than the usable packet size. // Here, we don't care about alignment larger than the usable packet size.
LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))), LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))), RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value, SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
@ -581,7 +581,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
// TODO enable vectorization for mixed types // TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
| (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
@ -600,7 +600,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
&& (LhsFlags & RhsFlags & ActualPacketAccessBit) && (LhsFlags & RhsFlags & ActualPacketAccessBit)
&& (InnerSize % packet_traits<Scalar>::size == 0) && (InnerSize % packet_traits<Scalar>::size == 0)
}; };
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
{ {
return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
@ -642,7 +642,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
protected: protected:
typename internal::add_const_on_value_type<LhsNested>::type m_lhs; typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
typename internal::add_const_on_value_type<RhsNested>::type m_rhs; typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
LhsEtorType m_lhsImpl; LhsEtorType m_lhsImpl;
RhsEtorType m_rhsImpl; RhsEtorType m_rhsImpl;
@ -673,7 +673,7 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProduc
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{ {
etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res); res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
@ -683,7 +683,7 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode> template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode> struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
{ {
etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res); etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res); res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
@ -693,7 +693,7 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
template<typename Lhs, typename Rhs, typename Packet, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{ {
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col)); res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
} }
@ -702,7 +702,7 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
template<typename Lhs, typename Rhs, typename Packet, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
{ {
res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col))); res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
} }
@ -711,7 +711,7 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
template<typename Lhs, typename Rhs, typename Packet, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{ {
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
} }
@ -720,7 +720,7 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
template<typename Lhs, typename Rhs, typename Packet, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
{ {
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
} }
@ -729,7 +729,7 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
template<typename Lhs, typename Rhs, typename Packet, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{ {
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
for(Index i = 0; i < innerDim; ++i) for(Index i = 0; i < innerDim; ++i)
@ -740,7 +740,7 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
template<typename Lhs, typename Rhs, typename Packet, int LoadMode> template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
{ {
static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
{ {
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0)); res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
for(Index i = 0; i < innerDim; ++i) for(Index i = 0; i < innerDim; ++i)
@ -762,7 +762,7 @@ struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> > : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
{ {
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dest> template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ {
@ -776,7 +776,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> > : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
{ {
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dest> template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ {
@ -797,7 +797,7 @@ struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> > : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
{ {
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dest> template<typename Dest>
static EIGEN_DEVICE_FUNC static EIGEN_DEVICE_FUNC
void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
@ -811,7 +811,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> > : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
{ {
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dest> template<typename Dest>
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ {
@ -823,7 +823,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
/*************************************************************************** /***************************************************************************
* Diagonal products * Diagonal products
***************************************************************************/ ***************************************************************************/
template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder> template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
struct diagonal_product_evaluator_base struct diagonal_product_evaluator_base
: evaluator_base<Derived> : evaluator_base<Derived>
@ -832,10 +832,10 @@ struct diagonal_product_evaluator_base
public: public:
enum { enum {
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost, CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
MatrixFlags = evaluator<MatrixType>::Flags, MatrixFlags = evaluator<MatrixType>::Flags,
DiagFlags = evaluator<DiagonalType>::Flags, DiagFlags = evaluator<DiagonalType>::Flags,
_StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor _StorageOrder = (Derived::MaxRowsAtCompileTime==1 && Derived::MaxColsAtCompileTime!=1) ? RowMajor
: (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor : (Derived::MaxColsAtCompileTime==1 && Derived::MaxRowsAtCompileTime!=1) ? ColMajor
: MatrixFlags & RowMajorBit ? RowMajor : ColMajor, : MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
@ -858,14 +858,14 @@ public:
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft) || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight) || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
}; };
EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
: m_diagImpl(diag), m_matImpl(mat) : m_diagImpl(diag), m_matImpl(mat)
{ {
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost); EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
{ {
if(AsScalarProduct) if(AsScalarProduct)
@ -873,7 +873,7 @@ public:
else else
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
} }
protected: protected:
template<int LoadMode,typename PacketType> template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
@ -881,7 +881,7 @@ protected:
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col), return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
internal::pset1<PacketType>(m_diagImpl.coeff(id))); internal::pset1<PacketType>(m_diagImpl.coeff(id)));
} }
template<int LoadMode,typename PacketType> template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
{ {
@ -892,7 +892,7 @@ protected:
return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col), return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id)); m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
} }
evaluator<DiagonalType> m_diagImpl; evaluator<DiagonalType> m_diagImpl;
evaluator<MatrixType> m_matImpl; evaluator<MatrixType> m_matImpl;
}; };
@ -907,24 +907,24 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
using Base::m_matImpl; using Base::m_matImpl;
using Base::coeff; using Base::coeff;
typedef typename Base::Scalar Scalar; typedef typename Base::Scalar Scalar;
typedef Product<Lhs, Rhs, ProductKind> XprType; typedef Product<Lhs, Rhs, ProductKind> XprType;
typedef typename XprType::PlainObject PlainObject; typedef typename XprType::PlainObject PlainObject;
typedef typename Lhs::DiagonalVectorType DiagonalType; typedef typename Lhs::DiagonalVectorType DiagonalType;
enum { StorageOrder = Base::_StorageOrder }; enum { StorageOrder = Base::_StorageOrder };
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
: Base(xpr.rhs(), xpr.lhs().diagonal()) : Base(xpr.rhs(), xpr.lhs().diagonal())
{ {
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
{ {
return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
} }
#ifndef EIGEN_GPUCC #ifndef EIGEN_GPUCC
template<int LoadMode,typename PacketType> template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
@ -934,7 +934,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
return this->template packet_impl<LoadMode,PacketType>(row,col, row, return this->template packet_impl<LoadMode,PacketType>(row,col, row,
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type()); typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
} }
template<int LoadMode,typename PacketType> template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index idx) const EIGEN_STRONG_INLINE PacketType packet(Index idx) const
{ {
@ -953,22 +953,22 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
using Base::m_matImpl; using Base::m_matImpl;
using Base::coeff; using Base::coeff;
typedef typename Base::Scalar Scalar; typedef typename Base::Scalar Scalar;
typedef Product<Lhs, Rhs, ProductKind> XprType; typedef Product<Lhs, Rhs, ProductKind> XprType;
typedef typename XprType::PlainObject PlainObject; typedef typename XprType::PlainObject PlainObject;
enum { StorageOrder = Base::_StorageOrder }; enum { StorageOrder = Base::_StorageOrder };
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
: Base(xpr.lhs(), xpr.rhs().diagonal()) : Base(xpr.lhs(), xpr.rhs().diagonal())
{ {
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
{ {
return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
} }
#ifndef EIGEN_GPUCC #ifndef EIGEN_GPUCC
template<int LoadMode,typename PacketType> template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
@ -976,7 +976,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
return this->template packet_impl<LoadMode,PacketType>(row,col, col, return this->template packet_impl<LoadMode,PacketType>(row,col, col,
typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type()); typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
} }
template<int LoadMode,typename PacketType> template<int LoadMode,typename PacketType>
EIGEN_STRONG_INLINE PacketType packet(Index idx) const EIGEN_STRONG_INLINE PacketType packet(Index idx) const
{ {
@ -1004,7 +1004,7 @@ struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned; typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
template<typename Dest, typename PermutationType> template<typename Dest, typename PermutationType>
static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
{ {
MatrixType mat(xpr); MatrixType mat(xpr);
const Index n = Side==OnTheLeft ? mat.rows() : mat.cols(); const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
@ -1058,7 +1058,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag> struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
{ {
template<typename Dest> template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{ {
permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs); permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
} }
@ -1068,7 +1068,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag> struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
{ {
template<typename Dest> template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{ {
permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs); permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
} }
@ -1078,7 +1078,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag> struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
{ {
template<typename Dest> template<typename Dest>
static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
{ {
permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs); permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
} }
@ -1088,7 +1088,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag> struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
{ {
template<typename Dest> template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
{ {
permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs); permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
} }
@ -1110,9 +1110,9 @@ struct transposition_matrix_product
{ {
typedef typename nested_eval<ExpressionType, 1>::type MatrixType; typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
typedef typename remove_all<MatrixType>::type MatrixTypeCleaned; typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
template<typename Dest, typename TranspositionType> template<typename Dest, typename TranspositionType>
static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
{ {
MatrixType mat(xpr); MatrixType mat(xpr);
typedef typename TranspositionType::StorageIndex StorageIndex; typedef typename TranspositionType::StorageIndex StorageIndex;
@ -1135,7 +1135,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag> struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
{ {
template<typename Dest> template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{ {
transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs); transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
} }
@ -1145,7 +1145,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag> struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
{ {
template<typename Dest> template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
{ {
transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs); transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
} }
@ -1156,7 +1156,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag> struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
{ {
template<typename Dest> template<typename Dest>
static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
{ {
transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs); transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
} }
@ -1166,7 +1166,7 @@ template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag> struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
{ {
template<typename Dest> template<typename Dest>
static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
{ {
transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs); transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
} }

View File

@ -10,7 +10,7 @@
#ifndef EIGEN_SOLVETRIANGULAR_H #ifndef EIGEN_SOLVETRIANGULAR_H
#define EIGEN_SOLVETRIANGULAR_H #define EIGEN_SOLVETRIANGULAR_H
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
@ -54,7 +54,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
typedef blas_traits<Lhs> LhsProductTraits; typedef blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::ExtractType ActualLhsType; typedef typename LhsProductTraits::ExtractType ActualLhsType;
typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs; typedef Map<Matrix<RhsScalar,Dynamic,1>, Aligned> MappedRhs;
static void run(const Lhs& lhs, Rhs& rhs) static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
{ {
ActualLhsType actualLhs = LhsProductTraits::extract(lhs); ActualLhsType actualLhs = LhsProductTraits::extract(lhs);
@ -64,7 +64,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(), ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
(useRhsDirectly ? rhs.data() : 0)); (useRhsDirectly ? rhs.data() : 0));
if(!useRhsDirectly) if(!useRhsDirectly)
MappedRhs(actualRhs,rhs.size()) = rhs; MappedRhs(actualRhs,rhs.size()) = rhs;
@ -85,7 +85,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
typedef blas_traits<Lhs> LhsProductTraits; typedef blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType; typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
static void run(const Lhs& lhs, Rhs& rhs) static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
{ {
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs); typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsProductTraits::extract(lhs);
@ -118,7 +118,7 @@ struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {
DiagIndex = IsLower ? LoopIndex : Size - LoopIndex - 1, DiagIndex = IsLower ? LoopIndex : Size - LoopIndex - 1,
StartIndex = IsLower ? 0 : DiagIndex+1 StartIndex = IsLower ? 0 : DiagIndex+1
}; };
static void run(const Lhs& lhs, Rhs& rhs) static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
{ {
if (LoopIndex>0) if (LoopIndex>0)
rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose() rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()
@ -133,22 +133,22 @@ struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {
template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size> template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> { struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {
static void run(const Lhs&, Rhs&) {} static EIGEN_DEVICE_FUNC void run(const Lhs&, Rhs&) {}
}; };
template<typename Lhs, typename Rhs, int Mode> template<typename Lhs, typename Rhs, int Mode>
struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> { struct triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,CompleteUnrolling,1> {
static void run(const Lhs& lhs, Rhs& rhs) static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
{ triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); } { triangular_solver_unroller<Lhs,Rhs,Mode,0,Rhs::SizeAtCompileTime>::run(lhs,rhs); }
}; };
template<typename Lhs, typename Rhs, int Mode> template<typename Lhs, typename Rhs, int Mode>
struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> { struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
static void run(const Lhs& lhs, Rhs& rhs) static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs)
{ {
Transpose<const Lhs> trLhs(lhs); Transpose<const Lhs> trLhs(lhs);
Transpose<Rhs> trRhs(rhs); Transpose<Rhs> trRhs(rhs);
triangular_solver_unroller<Transpose<const Lhs>,Transpose<Rhs>, triangular_solver_unroller<Transpose<const Lhs>,Transpose<Rhs>,
((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag), ((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs); 0,Rhs::SizeAtCompileTime>::run(trLhs,trRhs);

View File

@ -47,6 +47,7 @@ class TranspositionsBase
Index cols() const { return indices().size(); } Index cols() const { return indices().size(); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
EIGEN_DEVICE_FUNC
inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); } inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); } inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); }
@ -373,6 +374,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived()); return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
} }
EIGEN_DEVICE_FUNC
const TranspositionType& nestedExpression() const { return m_transpositions; } const TranspositionType& nestedExpression() const { return m_transpositions; }
protected: protected: