mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Adding EIGEN_DEVICE_FUNC to Products, especially Dense2Dense Assignment
specializations. Otherwise causes problems with small fixed size matrix multiplication (call to 0x00 in call_assignment_no_alias in debug mode or trap in release with CUDA 9.1).
This commit is contained in:
parent
d2b0a4a59b
commit
b2053990d0
@ -137,7 +137,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
|
|||||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||||
{
|
{
|
||||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||||
static EIGEN_STRONG_INLINE
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||||
{
|
{
|
||||||
Index dstRows = src.rows();
|
Index dstRows = src.rows();
|
||||||
@ -155,7 +155,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
|||||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||||
{
|
{
|
||||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||||
static EIGEN_STRONG_INLINE
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
||||||
{
|
{
|
||||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||||
@ -170,7 +170,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
|||||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||||
{
|
{
|
||||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||||
static EIGEN_STRONG_INLINE
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
||||||
{
|
{
|
||||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||||
@ -190,7 +190,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
|
|||||||
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
|
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
|
||||||
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
|
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
|
||||||
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
|
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
|
||||||
static EIGEN_STRONG_INLINE
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
|
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
|
||||||
{
|
{
|
||||||
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
|
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
|
||||||
@ -217,7 +217,7 @@ template<typename DstXprType, typename OtherXpr, typename ProductType, typename
|
|||||||
struct assignment_from_xpr_op_product
|
struct assignment_from_xpr_op_product
|
||||||
{
|
{
|
||||||
template<typename SrcXprType, typename InitialFunc>
|
template<typename SrcXprType, typename InitialFunc>
|
||||||
static EIGEN_STRONG_INLINE
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
|
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
|
||||||
{
|
{
|
||||||
call_assignment_no_alias(dst, src.lhs(), Func1());
|
call_assignment_no_alias(dst, src.lhs(), Func1());
|
||||||
@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
|
|||||||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
||||||
{
|
{
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
|
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -269,7 +269,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
|||||||
|
|
||||||
// Column major result
|
// Column major result
|
||||||
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
||||||
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
||||||
{
|
{
|
||||||
evaluator<Rhs> rhsEval(rhs);
|
evaluator<Rhs> rhsEval(rhs);
|
||||||
typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
|
typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
|
||||||
@ -282,7 +282,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
|
|||||||
|
|
||||||
// Row major result
|
// Row major result
|
||||||
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
||||||
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
||||||
{
|
{
|
||||||
evaluator<Lhs> lhsEval(lhs);
|
evaluator<Lhs> lhsEval(lhs);
|
||||||
typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
|
typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
|
||||||
@ -300,37 +300,37 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
|||||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||||
|
|
||||||
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
||||||
struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
||||||
struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
||||||
struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
||||||
struct adds {
|
struct adds {
|
||||||
Scalar m_scale;
|
Scalar m_scale;
|
||||||
explicit adds(const Scalar& s) : m_scale(s) {}
|
explicit adds(const Scalar& s) : m_scale(s) {}
|
||||||
template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
|
template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
|
||||||
dst.const_cast_derived() += m_scale * src;
|
dst.const_cast_derived() += m_scale * src;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
|
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
|
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
|
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||||
{
|
{
|
||||||
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
|
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
|
||||||
}
|
}
|
||||||
@ -345,15 +345,15 @@ struct generic_product_impl_base
|
|||||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
|
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{ scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
|
{ scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
|
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
@ -373,7 +373,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
|||||||
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
||||||
|
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||||
{
|
{
|
||||||
LhsNested actual_lhs(lhs);
|
LhsNested actual_lhs(lhs);
|
||||||
RhsNested actual_rhs(rhs);
|
RhsNested actual_rhs(rhs);
|
||||||
@ -390,7 +390,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
|||||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
// Same as: dst.noalias() = lhs.lazyProduct(rhs);
|
// Same as: dst.noalias() = lhs.lazyProduct(rhs);
|
||||||
// but easier on the compiler side
|
// but easier on the compiler side
|
||||||
@ -398,14 +398,14 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
// dst.noalias() += lhs.lazyProduct(rhs);
|
// dst.noalias() += lhs.lazyProduct(rhs);
|
||||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dst>
|
template<typename Dst>
|
||||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||||
{
|
{
|
||||||
// dst.noalias() -= lhs.lazyProduct(rhs);
|
// dst.noalias() -= lhs.lazyProduct(rhs);
|
||||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
||||||
|
Loading…
x
Reference in New Issue
Block a user