syncing this fork with upstream

This commit is contained in:
Deven Desai 2018-06-13 12:09:52 -04:00
commit d1d22ef0f4
88 changed files with 1266 additions and 395 deletions

View File

@ -157,6 +157,11 @@ template<typename Derived> class DenseBase
* we are dealing with a column-vector (if there is only one column) or with * we are dealing with a column-vector (if there is only one column) or with
* a row-vector (if there is only one row). */ * a row-vector (if there is only one row). */
NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2,
/**< This value is equal to Tensor::NumDimensions, i.e. 0 for scalars, 1 for vectors,
* and 2 for matrices.
*/
Flags = internal::traits<Derived>::Flags, Flags = internal::traits<Derived>::Flags,
/**< This stores expression \ref flags flags which may or may not be inherited by new expressions /**< This stores expression \ref flags flags which may or may not be inherited by new expressions
* constructed from this one. See the \ref flags "list of flags". * constructed from this one. See the \ref flags "list of flags".

View File

@ -85,6 +85,8 @@ struct default_packet_traits
HasI0e = 0, HasI0e = 0,
HasI1e = 0, HasI1e = 0,
HasIGamma = 0, HasIGamma = 0,
HasIGammaDerA = 0,
HasGammaSampleDerAlpha = 0,
HasIGammac = 0, HasIGammac = 0,
HasBetaInc = 0, HasBetaInc = 0,

View File

@ -1293,17 +1293,17 @@ double exp(const double &x) { return ::exp(x); }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
std::complex<float> exp(const std::complex<float>& x) { std::complex<float> exp(const std::complex<float>& x) {
auto com = ::expf(x.real()); float com = ::expf(x.real());
auto res_real = com * ::cosf(x.imag()); float res_real = com * ::cosf(x.imag());
auto res_imag = com * ::sinf(x.imag()); float res_imag = com * ::sinf(x.imag());
return std::complex<float>(res_real, res_imag); return std::complex<float>(res_real, res_imag);
} }
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
std::complex<double> exp(const std::complex<double>& x) { std::complex<double> exp(const std::complex<double>& x) {
auto com = ::exp(x.real()); double com = ::exp(x.real());
auto res_real = com * ::cos(x.imag()); double res_real = com * ::cos(x.imag());
auto res_imag = com * ::sin(x.imag()); double res_imag = com * ::sin(x.imag());
return std::complex<double>(res_real, res_imag); return std::complex<double>(res_real, res_imag);
} }
#endif #endif

View File

@ -137,10 +137,14 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{ {
typedef Product<Lhs,Rhs,Options> SrcXprType; typedef Product<Lhs,Rhs,Options> SrcXprType;
<<<<<<< local
#if defined(EIGEN_HIPCC) #if defined(EIGEN_HIPCC)
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
#endif #endif
static EIGEN_STRONG_INLINE static EIGEN_STRONG_INLINE
=======
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
>>>>>>> other
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &) void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{ {
Index dstRows = src.rows(); Index dstRows = src.rows();
@ -158,7 +162,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{ {
typedef Product<Lhs,Rhs,Options> SrcXprType; typedef Product<Lhs,Rhs,Options> SrcXprType;
static EIGEN_STRONG_INLINE static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &) void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
{ {
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@ -173,7 +177,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
{ {
typedef Product<Lhs,Rhs,Options> SrcXprType; typedef Product<Lhs,Rhs,Options> SrcXprType;
static EIGEN_STRONG_INLINE static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &) void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
{ {
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@ -193,7 +197,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType; const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
static EIGEN_STRONG_INLINE static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
{ {
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
@ -220,7 +224,7 @@ template<typename DstXprType, typename OtherXpr, typename ProductType, typename
struct assignment_from_xpr_op_product struct assignment_from_xpr_op_product
{ {
template<typename SrcXprType, typename InitialFunc> template<typename SrcXprType, typename InitialFunc>
static EIGEN_STRONG_INLINE static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
{ {
call_assignment_no_alias(dst, src.lhs(), Func1()); call_assignment_no_alias(dst, src.lhs(), Func1());
@ -249,19 +253,19 @@ template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct> struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
{ {
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
} }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
} }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
}; };
@ -272,7 +276,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
// Column major result // Column major result
template<typename Dst, typename Lhs, typename Rhs, typename Func> template<typename Dst, typename Lhs, typename Rhs, typename Func>
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
{ {
evaluator<Rhs> rhsEval(rhs); evaluator<Rhs> rhsEval(rhs);
typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs); typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
@ -285,7 +289,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
// Row major result // Row major result
template<typename Dst, typename Lhs, typename Rhs, typename Func> template<typename Dst, typename Lhs, typename Rhs, typename Func>
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
{ {
evaluator<Lhs> lhsEval(lhs); evaluator<Lhs> lhsEval(lhs);
typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs); typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
@ -303,37 +307,37 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
struct adds { struct adds {
Scalar m_scale; Scalar m_scale;
explicit adds(const Scalar& s) : m_scale(s) {} explicit adds(const Scalar& s) : m_scale(s) {}
template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
dst.const_cast_derived() += m_scale * src; dst.const_cast_derived() += m_scale * src;
} }
}; };
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
} }
@ -348,19 +352,19 @@ struct generic_product_impl_base
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
}; };
@ -376,7 +380,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType; typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
template<typename Dest> template<typename Dest>
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ {
LhsNested actual_lhs(lhs); LhsNested actual_lhs(lhs);
RhsNested actual_rhs(rhs); RhsNested actual_rhs(rhs);
@ -393,10 +397,14 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
typedef typename Product<Lhs,Rhs>::Scalar Scalar; typedef typename Product<Lhs,Rhs>::Scalar Scalar;
template<typename Dst> template<typename Dst>
<<<<<<< local
#if defined(EIGEN_HIPCC) #if defined(EIGEN_HIPCC)
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
#endif #endif
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
=======
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
>>>>>>> other
{ {
// Same as: dst.noalias() = lhs.lazyProduct(rhs); // Same as: dst.noalias() = lhs.lazyProduct(rhs);
// but easier on the compiler side // but easier on the compiler side
@ -404,14 +412,14 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
} }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
// dst.noalias() += lhs.lazyProduct(rhs); // dst.noalias() += lhs.lazyProduct(rhs);
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>()); call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
} }
template<typename Dst> template<typename Dst>
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
// dst.noalias() -= lhs.lazyProduct(rhs); // dst.noalias() -= lhs.lazyProduct(rhs);
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>()); call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());

View File

@ -17,7 +17,6 @@ namespace Eigen {
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
@ -25,7 +24,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(co
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
@ -33,7 +31,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(co
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
@ -41,7 +38,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(co
template<typename Derived> template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }

View File

@ -181,7 +181,7 @@ struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<t
} }
}; };
} // end namepsace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View File

@ -56,7 +56,8 @@ class SolverBase : public EigenBase<Derived>
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime, MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime>::ret), internal::traits<Derived>::MaxColsAtCompileTime>::ret),
IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1 IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
|| internal::traits<Derived>::MaxColsAtCompileTime == 1 || internal::traits<Derived>::MaxColsAtCompileTime == 1,
NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2
}; };
/** Default constructor */ /** Default constructor */

View File

@ -318,9 +318,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
} }
#ifndef EIGEN_VECTORIZE_AVX512 #ifndef EIGEN_VECTORIZE_AVX512
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif #endif
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) { template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {

View File

@ -604,9 +604,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
pstore(to, pa); pstore(to, pa);
} }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template <> template <>
EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) { EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {

View File

@ -47,6 +47,8 @@ template<> struct packet_traits<float> : default_packet_traits
HasI0e = 1, HasI0e = 1,
HasI1e = 1, HasI1e = 1,
HasIGamma = 1, HasIGamma = 1,
HasIGammaDerA = 1,
HasGammaSampleDerAlpha = 1,
HasIGammac = 1, HasIGammac = 1,
HasBetaInc = 1, HasBetaInc = 1,
@ -78,6 +80,8 @@ template<> struct packet_traits<double> : default_packet_traits
HasI0e = 1, HasI0e = 1,
HasI1e = 1, HasI1e = 1,
HasIGamma = 1, HasIGamma = 1,
HasIGammaDerA = 1,
HasGammaSampleDerAlpha = 1,
HasIGammac = 1, HasIGammac = 1,
HasBetaInc = 1, HasBetaInc = 1,

View File

@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3))); _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
} }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{ {
@ -324,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{ {

View File

@ -461,10 +461,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0))); pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
} }
#if EIGEN_COMP_PGI
typedef const void * SsePrefetchPtrType;
#else
typedef const char * SsePrefetchPtrType;
#endif
#ifndef EIGEN_VECTORIZE_AVX #ifndef EIGEN_VECTORIZE_AVX
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif #endif
#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64

View File

@ -91,7 +91,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
// FIXME the transpose variable is only needed to properly split // FIXME the transpose variable is only needed to properly split
// the matrix product when multithreading is enabled. This is a temporary // the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole // fix to support row-major destination matrices. This whole
// parallelizer mechanism has to be redisigned anyway. // parallelizer mechanism has to be redesigned anyway.
EIGEN_UNUSED_VARIABLE(depth); EIGEN_UNUSED_VARIABLE(depth);
EIGEN_UNUSED_VARIABLE(transpose); EIGEN_UNUSED_VARIABLE(transpose);
func(0,rows, 0,cols); func(0,rows, 0,cols);

View File

@ -702,7 +702,7 @@ namespace Eigen {
// If the user explicitly disable vectorization, then we also disable alignment // If the user explicitly disable vectorization, then we also disable alignment
#if defined(EIGEN_DONT_VECTORIZE) #if defined(EIGEN_DONT_VECTORIZE)
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
#elif defined(EIGEN_VECTORIZE_AVX512) #elif defined(__AVX512F__)
// 64 bytes static alignment is preferred only if really required // 64 bytes static alignment is preferred only if really required
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
#elif defined(__AVX__) #elif defined(__AVX__)
@ -1033,7 +1033,13 @@ namespace Eigen {
# define EIGEN_NOEXCEPT # define EIGEN_NOEXCEPT
# define EIGEN_NOEXCEPT_IF(x) # define EIGEN_NOEXCEPT_IF(x)
# define EIGEN_NO_THROW throw() # define EIGEN_NO_THROW throw()
# if EIGEN_COMP_MSVC
// MSVC does not support exception specifications (warning C4290),
// and they are deprecated in c++11 anyway.
# define EIGEN_EXCEPTION_SPEC(X) throw()
# else
# define EIGEN_EXCEPTION_SPEC(X) throw(X) # define EIGEN_EXCEPTION_SPEC(X) throw(X)
# endif # endif
#endif
#endif // EIGEN_MACROS_H #endif // EIGEN_MACROS_H

View File

@ -587,23 +587,27 @@ T div_ceil(const T &a, const T &b)
// The aim of the following functions is to bypass -Wfloat-equal warnings // The aim of the following functions is to bypass -Wfloat-equal warnings
// when we really want a strict equality comparison on floating points. // when we really want a strict equality comparison on floating points.
template<typename X, typename Y> EIGEN_STRONG_INLINE template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool equal_strict(const X& x,const Y& y) { return x == y; } bool equal_strict(const X& x,const Y& y) { return x == y; }
template<> EIGEN_STRONG_INLINE #if !defined(EIGEN_CUDA_ARCH)
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); } bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
template<> EIGEN_STRONG_INLINE template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); } bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
#endif
template<typename X, typename Y> EIGEN_STRONG_INLINE template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool not_equal_strict(const X& x,const Y& y) { return x != y; } bool not_equal_strict(const X& x,const Y& y) { return x != y; }
template<> EIGEN_STRONG_INLINE #if !defined(EIGEN_CUDA_ARCH)
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); } bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
template<> EIGEN_STRONG_INLINE template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); } bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
#endif
} // end namespace numext } // end namespace numext

View File

@ -284,13 +284,13 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
using std::abs; using std::abs;
m_matT = matrixH; m_matT = matrixH;
m_workspaceVector.resize(m_matT.cols());
if(computeU) if(computeU)
m_matU = matrixQ; matrixQ.evalTo(m_matU, m_workspaceVector);
Index maxIters = m_maxIters; Index maxIters = m_maxIters;
if (maxIters == -1) if (maxIters == -1)
maxIters = m_maxIterationsPerRow * matrixH.rows(); maxIters = m_maxIterationsPerRow * matrixH.rows();
m_workspaceVector.resize(m_matT.cols());
Scalar* workspace = &m_workspaceVector.coeffRef(0); Scalar* workspace = &m_workspaceVector.coeffRef(0);
// The matrix m_matT is divided in three parts. // The matrix m_matT is divided in three parts.

View File

@ -108,7 +108,7 @@ struct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, interna
} }
}; };
} // end namepsace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View File

@ -5,7 +5,7 @@
/* /*
NOTE: thes functions vave been adapted from the LDL library: NOTE: these functions have been adapted from the LDL library:
LDL Copyright (c) 2005 by Timothy A. Davis. All Rights Reserved. LDL Copyright (c) 2005 by Timothy A. Davis. All Rights Reserved.

View File

@ -87,6 +87,11 @@ template<typename Derived> class SparseMatrixBase
* we are dealing with a column-vector (if there is only one column) or with * we are dealing with a column-vector (if there is only one column) or with
* a row-vector (if there is only one row). */ * a row-vector (if there is only one row). */
NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2,
/**< This value is equal to Tensor::NumDimensions, i.e. 0 for scalars, 1 for vectors,
* and 2 for matrices.
*/
Flags = internal::traits<Derived>::Flags, Flags = internal::traits<Derived>::Flags,
/**< This stores expression \ref flags flags which may or may not be inherited by new expressions /**< This stores expression \ref flags flags which may or may not be inherited by new expressions
* constructed from this one. See the \ref flags "list of flags". * constructed from this one. See the \ref flags "list of flags".

View File

@ -10,7 +10,7 @@ int main()
MatrixXi m(size,size+1); // a (size)x(size+1)-matrix of int's MatrixXi m(size,size+1); // a (size)x(size+1)-matrix of int's
for (int j=0; j<m.cols(); ++j) // loop over columns for (int j=0; j<m.cols(); ++j) // loop over columns
for (int i=0; i<m.rows(); ++i) // loop over rows for (int i=0; i<m.rows(); ++i) // loop over rows
m(i,j) = i+j*m.rows(); // to access matrix coefficients, m(i,j) = i+j*size; // to access matrix coefficients,
// use operator()(int,int) // use operator()(int,int)
std::cout << m << "\n\n"; std::cout << m << "\n\n";
} }

View File

@ -67,6 +67,7 @@ namespace internal {
// This method should implement "dst += alpha * lhs * rhs" inplace, // This method should implement "dst += alpha * lhs * rhs" inplace,
// however, for iterative solvers, alpha is always equal to 1, so let's not bother about it. // however, for iterative solvers, alpha is always equal to 1, so let's not bother about it.
assert(alpha==Scalar(1) && "scaling is not implemented"); assert(alpha==Scalar(1) && "scaling is not implemented");
EIGEN_ONLY_USED_FOR_DEBUG(alpha);
// Here we could simply call dst.noalias() += lhs.my_matrix() * rhs, // Here we could simply call dst.noalias() += lhs.my_matrix() * rhs,
// but let's do something fancier (and less efficient): // but let's do something fancier (and less efficient):

View File

@ -1,5 +1,6 @@
#include <Eigen/Sparse> #include <Eigen/Sparse>
#include <vector> #include <vector>
#include <iostream>
typedef Eigen::SparseMatrix<double> SpMat; // declares a column-major sparse matrix type of double typedef Eigen::SparseMatrix<double> SpMat; // declares a column-major sparse matrix type of double
typedef Eigen::Triplet<double> T; typedef Eigen::Triplet<double> T;
@ -9,7 +10,10 @@ void saveAsBitmap(const Eigen::VectorXd& x, int n, const char* filename);
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
assert(argc==2); if(argc!=2) {
std::cerr << "Error: expected one and only one argument.\n";
return -1;
}
int n = 300; // size of the image int n = 300; // size of the image
int m = n*n; // number of unknowns (=number of pixels) int m = n*n; // number of unknowns (=number of pixels)

View File

@ -47,7 +47,7 @@ set(EIGEN_TEST_MATRIX_DIR "" CACHE STRING "Enable testing of realword sparse mat
if(EIGEN_TEST_MATRIX_DIR) if(EIGEN_TEST_MATRIX_DIR)
if(NOT WIN32) if(NOT WIN32)
message(STATUS "Test realworld sparse matrices: ${EIGEN_TEST_MATRIX_DIR}") message(STATUS "Test realworld sparse matrices: ${EIGEN_TEST_MATRIX_DIR}")
add_definitions( -DTEST_REAL_CASES=${EIGEN_TEST_MATRIX_DIR} ) add_definitions( -DTEST_REAL_CASES="${EIGEN_TEST_MATRIX_DIR}" )
else(NOT WIN32) else(NOT WIN32)
message(STATUS "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32") message(STATUS "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32")
endif(NOT WIN32) endif(NOT WIN32)
@ -292,6 +292,7 @@ ei_add_test(mpl2only)
ei_add_test(inplace_decomposition) ei_add_test(inplace_decomposition)
ei_add_test(half_float) ei_add_test(half_float)
ei_add_test(array_of_string) ei_add_test(array_of_string)
ei_add_test(num_dimensions)
add_executable(bug1213 bug1213.cpp bug1213_main.cpp) add_executable(bug1213 bug1213.cpp bug1213_main.cpp)

View File

@ -140,7 +140,7 @@ void check_indexed_view()
"500 501 502 503 504 505 506 507 508 509") "500 501 502 503 504 505 506 507 508 509")
); );
// takes the row numer 3, and repeat it 5 times // take row number 3, and repeat it 5 times
VERIFY( MATCH( A(seqN(3,5,0), all), VERIFY( MATCH( A(seqN(3,5,0), all),
"300 301 302 303 304 305 306 307 308 309\n" "300 301 302 303 304 305 306 307 308 309\n"
"300 301 302 303 304 305 306 307 308 309\n" "300 301 302 303 304 305 306 307 308 309\n"
@ -397,10 +397,6 @@ void test_indexed_view()
// } // }
// static checks of some internals: // static checks of some internals:
#define STATIC_CHECK( COND ) \
EIGEN_STATIC_ASSERT( (COND) , EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT )
STATIC_CHECK(( internal::is_valid_index_type<int>::value )); STATIC_CHECK(( internal::is_valid_index_type<int>::value ));
STATIC_CHECK(( internal::is_valid_index_type<unsigned int>::value )); STATIC_CHECK(( internal::is_valid_index_type<unsigned int>::value ));
STATIC_CHECK(( internal::is_valid_index_type<short>::value )); STATIC_CHECK(( internal::is_valid_index_type<short>::value ));

View File

@ -345,6 +345,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file,
#define VERIFY_IS_UNITARY(a) VERIFY(test_isUnitary(a)) #define VERIFY_IS_UNITARY(a) VERIFY(test_isUnitary(a))
#define STATIC_CHECK(COND) EIGEN_STATIC_ASSERT( (COND) , EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT )
#define CALL_SUBTEST(FUNC) do { \ #define CALL_SUBTEST(FUNC) do { \
g_test_stack.push_back(EI_PP_MAKE_STRING(FUNC)); \ g_test_stack.push_back(EI_PP_MAKE_STRING(FUNC)); \
FUNC; \ FUNC; \
@ -365,7 +367,7 @@ template<> inline long double test_precision<std::complex<long double> >() { ret
inline bool test_isApprox(const short& a, const short& b) inline bool test_isApprox(const short& a, const short& b)
{ return internal::isApprox(a, b, test_precision<short>()); } { return internal::isApprox(a, b, test_precision<short>()); }
inline bool test_isApprox(const unsigned short& a, const unsigned short& b) inline bool test_isApprox(const unsigned short& a, const unsigned short& b)
{ return internal::isApprox(a, b, test_precision<unsigned long>()); } { return internal::isApprox(a, b, test_precision<unsigned short>()); }
inline bool test_isApprox(const unsigned int& a, const unsigned int& b) inline bool test_isApprox(const unsigned int& a, const unsigned int& b)
{ return internal::isApprox(a, b, test_precision<unsigned int>()); } { return internal::isApprox(a, b, test_precision<unsigned int>()); }
inline bool test_isApprox(const long& a, const long& b) inline bool test_isApprox(const long& a, const long& b)

View File

@ -205,7 +205,6 @@ void test_mapped_matrix()
CALL_SUBTEST_8( map_static_methods(RowVector3d()) ); CALL_SUBTEST_8( map_static_methods(RowVector3d()) );
CALL_SUBTEST_9( map_static_methods(VectorXcd(8)) ); CALL_SUBTEST_9( map_static_methods(VectorXcd(8)) );
CALL_SUBTEST_10( map_static_methods(VectorXf(12)) ); CALL_SUBTEST_10( map_static_methods(VectorXf(12)) );
CALL_SUBTEST_11( map_not_aligned_on_scalar<double>() ); CALL_SUBTEST_11( map_not_aligned_on_scalar<double>() );
} }
} }

90
test/num_dimensions.cpp Normal file
View File

@ -0,0 +1,90 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2018 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/SparseCore>
template<int ExpectedDim,typename Xpr>
void check_dim(const Xpr& ) {
STATIC_CHECK( Xpr::NumDimensions == ExpectedDim );
}
#if EIGEN_HAS_CXX11
template<template <typename,int,int> class Object>
void map_num_dimensions()
{
typedef Object<double, 1, 1> ArrayScalarType;
typedef Object<double, 2, 1> ArrayVectorType;
typedef Object<double, 1, 2> TransposeArrayVectorType;
typedef Object<double, 2, 2> ArrayType;
typedef Object<double, Eigen::Dynamic, 1> DynamicArrayVectorType;
typedef Object<double, 1, Eigen::Dynamic> DynamicTransposeArrayVectorType;
typedef Object<double, Eigen::Dynamic, Eigen::Dynamic> DynamicArrayType;
STATIC_CHECK(ArrayScalarType::NumDimensions == 0);
STATIC_CHECK(ArrayVectorType::NumDimensions == 1);
STATIC_CHECK(TransposeArrayVectorType::NumDimensions == 1);
STATIC_CHECK(ArrayType::NumDimensions == 2);
STATIC_CHECK(DynamicArrayVectorType::NumDimensions == 1);
STATIC_CHECK(DynamicTransposeArrayVectorType::NumDimensions == 1);
STATIC_CHECK(DynamicArrayType::NumDimensions == 2);
typedef Eigen::Map<ArrayScalarType> ArrayScalarMap;
typedef Eigen::Map<ArrayVectorType> ArrayVectorMap;
typedef Eigen::Map<TransposeArrayVectorType> TransposeArrayVectorMap;
typedef Eigen::Map<ArrayType> ArrayMap;
typedef Eigen::Map<DynamicArrayVectorType> DynamicArrayVectorMap;
typedef Eigen::Map<DynamicTransposeArrayVectorType> DynamicTransposeArrayVectorMap;
typedef Eigen::Map<DynamicArrayType> DynamicArrayMap;
STATIC_CHECK(ArrayScalarMap::NumDimensions == 0);
STATIC_CHECK(ArrayVectorMap::NumDimensions == 1);
STATIC_CHECK(TransposeArrayVectorMap::NumDimensions == 1);
STATIC_CHECK(ArrayMap::NumDimensions == 2);
STATIC_CHECK(DynamicArrayVectorMap::NumDimensions == 1);
STATIC_CHECK(DynamicTransposeArrayVectorMap::NumDimensions == 1);
STATIC_CHECK(DynamicArrayMap::NumDimensions == 2);
}
template<typename Scalar, int Rows, int Cols>
using TArray = Array<Scalar,Rows,Cols>;
template<typename Scalar, int Rows, int Cols>
using TMatrix = Matrix<Scalar,Rows,Cols>;
#endif
void test_num_dimensions()
{
int n = 10;
ArrayXXd A(n,n);
CALL_SUBTEST( check_dim<2>(A) );
CALL_SUBTEST( check_dim<2>(A.block(1,1,2,2)) );
CALL_SUBTEST( check_dim<1>(A.col(1)) );
CALL_SUBTEST( check_dim<1>(A.row(1)) );
MatrixXd M(n,n);
CALL_SUBTEST( check_dim<0>(M.row(1)*M.col(1)) );
SparseMatrix<double> S(n,n);
CALL_SUBTEST( check_dim<2>(S) );
CALL_SUBTEST( check_dim<2>(S.block(1,1,2,2)) );
CALL_SUBTEST( check_dim<1>(S.col(1)) );
CALL_SUBTEST( check_dim<1>(S.row(1)) );
SparseVector<double> s(n);
CALL_SUBTEST( check_dim<1>(s) );
CALL_SUBTEST( check_dim<1>(s.head(2)) );
#if EIGEN_HAS_CXX11
CALL_SUBTEST( map_num_dimensions<TArray>() );
CALL_SUBTEST( map_num_dimensions<TMatrix>() );
#endif
}

View File

@ -581,7 +581,7 @@ is not initialized.
Creates a tensor mapping an existing array of data. The data must not be freed Creates a tensor mapping an existing array of data. The data must not be freed
until the TensorMap is discarded, and the size of the data must be large enough until the TensorMap is discarded, and the size of the data must be large enough
to accomodate of the coefficients of the tensor. to accommodate the coefficients of the tensor.
float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
Eigen::TensorMap<Tensor<float, 2>> a(data, 3, 4); Eigen::TensorMap<Tensor<float, 2>> a(data, 3, 4);

View File

@ -48,7 +48,7 @@ namespace Eigen {
* *
* <dl> * <dl>
* <dt><b>Relation to other parts of Eigen:</b></dt> * <dt><b>Relation to other parts of Eigen:</b></dt>
* <dd>The midterm developement goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that * <dd>The midterm development goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that
* taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code
* by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor
* class does not provide any of these features and is only available as a stand-alone class that just allows for * class does not provide any of these features and is only available as a stand-alone class that just allows for

View File

@ -20,7 +20,7 @@ namespace Eigen {
* \brief The tensor base class. * \brief The tensor base class.
* *
* This class is the common parent of the Tensor and TensorMap class, thus * This class is the common parent of the Tensor and TensorMap class, thus
* making it possible to use either class interchangably in expressions. * making it possible to use either class interchangeably in expressions.
*/ */
template<typename Derived> template<typename Derived>
@ -152,6 +152,20 @@ class TensorBase<Derived, ReadOnlyAccessors>
return binaryExpr(other.derived(), internal::scalar_igamma_op<Scalar>()); return binaryExpr(other.derived(), internal::scalar_igamma_op<Scalar>());
} }
// igamma_der_a(a = this, x = other)
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_igamma_der_a_op<Scalar>, const Derived, const OtherDerived>
igamma_der_a(const OtherDerived& other) const {
return binaryExpr(other.derived(), internal::scalar_igamma_der_a_op<Scalar>());
}
// gamma_sample_der_alpha(alpha = this, sample = other)
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_gamma_sample_der_alpha_op<Scalar>, const Derived, const OtherDerived>
gamma_sample_der_alpha(const OtherDerived& other) const {
return binaryExpr(other.derived(), internal::scalar_gamma_sample_der_alpha_op<Scalar>());
}
// igammac(a = this, x = other) // igammac(a = this, x = other)
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorCwiseBinaryOp<internal::scalar_igammac_op<Scalar>, const Derived, const OtherDerived> const TensorCwiseBinaryOp<internal::scalar_igammac_op<Scalar>, const Derived, const OtherDerived>

View File

@ -105,6 +105,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
bool nByOne = false, oneByN = false;
enum { enum {
IsAligned = true, IsAligned = true,
@ -142,6 +143,24 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
} }
} }
if (input_dims[0] == 1) {
oneByN = true;
for (int i = 1; i < NumDims; ++i) {
if (broadcast[i] != 1) {
oneByN = false;
break;
}
}
} else if (input_dims[NumDims-1] == 1) {
nByOne = true;
for (int i = 0; i < NumDims-1; ++i) {
if (broadcast[i] != 1) {
nByOne = false;
break;
}
}
}
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@ -237,11 +256,86 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
} }
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
if (oneByN) {
return packetNByOne<LoadMode>(index);
} else if (nByOne) {
return packetOneByN<LoadMode>(index);
} else {
return packetColMajor<LoadMode>(index); return packetColMajor<LoadMode>(index);
}
} else {
if (oneByN) {
return packetOneByN<LoadMode>(index);
} else if (nByOne) {
return packetNByOne<LoadMode>(index);
} else { } else {
return packetRowMajor<LoadMode>(index); return packetRowMajor<LoadMode>(index);
} }
} }
}
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
Index dim, inputIndex;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
dim = NumDims - 1;
} else {
dim = 0;
}
inputIndex = index % m_inputStrides[dim];
if (inputIndex + PacketSize <= m_inputStrides[dim]) {
return m_impl.template packet<Unaligned>(inputIndex);
} else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
for (int i = 0; i < PacketSize; ++i) {
if (inputIndex > m_inputStrides[dim]-1) {
inputIndex = 0;
}
values[i] = m_impl.coeff(inputIndex++);
}
return internal::pload<PacketReturnType>(values);
}
}
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
Index dim, inputIndex, outputOffset;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
dim = 1;
} else {
dim = NumDims - 2;
}
inputIndex = index / m_outputStrides[dim];
outputOffset = index % m_outputStrides[dim];
if (outputOffset + PacketSize <= m_outputStrides[dim]) {
values[0] = m_impl.coeff(inputIndex);
return internal::pload1<PacketReturnType>(values);
} else {
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
if (outputOffset + cur < m_outputStrides[dim]) {
values[i] = m_impl.coeff(inputIndex);
} else {
values[i] = m_impl.coeff(++inputIndex);
outputOffset = 0;
cur = 0;
}
}
return internal::pload<PacketReturnType>(values);
}
}
// Ignore the LoadMode and always use unaligned loads since we can't guarantee // Ignore the LoadMode and always use unaligned loads since we can't guarantee
// the alignment at compile time. // the alignment at compile time.
@ -290,8 +384,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
values[0] = m_impl.coeff(inputIndex); values[0] = m_impl.coeff(inputIndex);
for (int i = 1; i < PacketSize; ++i) { for (int i = 1; i < PacketSize; ++i) {
if (innermostLoc + i < m_impl.dimensions()[0]) {
values[i] = m_impl.coeff(inputIndex+i);
} else {
values[i] = coeffColMajor(originalIndex+i); values[i] = coeffColMajor(originalIndex+i);
} }
}
PacketReturnType rslt = internal::pload<PacketReturnType>(values); PacketReturnType rslt = internal::pload<PacketReturnType>(values);
return rslt; return rslt;
} }
@ -342,8 +440,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
values[0] = m_impl.coeff(inputIndex); values[0] = m_impl.coeff(inputIndex);
for (int i = 1; i < PacketSize; ++i) { for (int i = 1; i < PacketSize; ++i) {
if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) {
values[i] = m_impl.coeff(inputIndex+i);
} else {
values[i] = coeffRowMajor(originalIndex+i); values[i] = coeffRowMajor(originalIndex+i);
} }
}
PacketReturnType rslt = internal::pload<PacketReturnType>(values); PacketReturnType rslt = internal::pload<PacketReturnType>(values);
return rslt; return rslt;
} }

View File

@ -78,7 +78,7 @@ class TensorXsmmContractionBlocking {
outer_n_ = outer_n_ != 0 ? outer_n_ : n; outer_n_ = outer_n_ != 0 ? outer_n_ : n;
} }
#else #else
// Defaults, possibly overriden per-platform. // Defaults, possibly overridden per-platform.
copyA_ = true; copyA_ = true;
copyB_ = false; copyB_ = false;

View File

@ -350,7 +350,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// Normal number of notifications for k slice switch is // Normal number of notifications for k slice switch is
// nm_ + nn_ + nm_ * nn_. However, first P - 1 slices will receive only // nm_ + nn_ + nm_ * nn_. However, first P - 1 slices will receive only
// nm_ + nn_ notifications, because they will not receive notifications // nm_ + nn_ notifications, because they will not receive notifications
// from preceeding kernels. // from preceding kernels.
state_switch_[x] = state_switch_[x] =
x == 0 x == 0
? 1 ? 1
@ -530,7 +530,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
void kernel(Index m, Index n, Index k) { void kernel(Index m, Index n, Index k) {
// Note: order of iteration matters here. Iteration over m is innermost // Note: order of iteration matters here. Iteration over m is innermost
// because we want to reuse the same packed rhs in consequetive tasks // because we want to reuse the same packed rhs in consecutive tasks
// (rhs fits into L2$ while lhs only into L3$). // (rhs fits into L2$ while lhs only into L3$).
const Index nend = n * gn_ + gn(n); const Index nend = n * gn_ + gn(n);
const Index mend = m * gm_ + gm(m); const Index mend = m * gm_ + gm(m);

View File

@ -195,7 +195,7 @@ class TensorCostModel {
// 11 is L2 cache latency on Haswell. // 11 is L2 cache latency on Haswell.
// We don't know whether data is in L1, L2 or L3. But we are most interested // We don't know whether data is in L1, L2 or L3. But we are most interested
// in single-threaded computational time around 100us-10ms (smaller time // in single-threaded computational time around 100us-10ms (smaller time
// is too small for parallelization, larger time is not intersting // is too small for parallelization, larger time is not interesting
// either because we are probably using all available threads already). // either because we are probably using all available threads already).
// And for the target time range, L2 seems to be what matters. Data set // And for the target time range, L2 seems to be what matters. Data set
// fitting into L1 is too small to take noticeable time. Data set fitting // fitting into L1 is too small to take noticeable time. Data set fitting

View File

@ -286,7 +286,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) {
tileSize =static_cast<Index>(m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()); tileSize =static_cast<Index>(m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>());
auto s= m_queue.get_device().template get_info<cl::sycl::info::device::vendor>(); auto s= m_queue.get_device().template get_info<cl::sycl::info::device::vendor>();
std::transform(s.begin(), s.end(), s.begin(), ::tolower); std::transform(s.begin(), s.end(), s.begin(), ::tolower);
if(m_queue.get_device().is_cpu()){ // intel doesnot allow to use max workgroup size if(m_queue.get_device().is_cpu()){ // intel doesn't allow to use max workgroup size
tileSize=std::min(static_cast<Index>(256), static_cast<Index>(tileSize)); tileSize=std::min(static_cast<Index>(256), static_cast<Index>(tileSize));
} }
rng = n; rng = n;
@ -303,7 +303,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) {
template<typename Index> template<typename Index>
EIGEN_STRONG_INLINE void parallel_for_setup(Index dim0, Index dim1, Index &tileSize0, Index &tileSize1, Index &rng0, Index &rng1, Index &GRange0, Index &GRange1) const { EIGEN_STRONG_INLINE void parallel_for_setup(Index dim0, Index dim1, Index &tileSize0, Index &tileSize1, Index &rng0, Index &rng1, Index &GRange0, Index &GRange1) const {
Index max_workgroup_Size = static_cast<Index>(maxSyclThreadsPerBlock()); Index max_workgroup_Size = static_cast<Index>(maxSyclThreadsPerBlock());
if(m_queue.get_device().is_cpu()){ // intel doesnot allow to use max workgroup size if(m_queue.get_device().is_cpu()){ // intel doesn't allow to use max workgroup size
max_workgroup_Size=std::min(static_cast<Index>(256), static_cast<Index>(max_workgroup_Size)); max_workgroup_Size=std::min(static_cast<Index>(256), static_cast<Index>(max_workgroup_Size));
} }
Index pow_of_2 = static_cast<Index>(std::log2(max_workgroup_Size)); Index pow_of_2 = static_cast<Index>(std::log2(max_workgroup_Size));
@ -331,7 +331,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) {
template<typename Index> template<typename Index>
EIGEN_STRONG_INLINE void parallel_for_setup(Index dim0, Index dim1,Index dim2, Index &tileSize0, Index &tileSize1, Index &tileSize2, Index &rng0, Index &rng1, Index &rng2, Index &GRange0, Index &GRange1, Index &GRange2) const { EIGEN_STRONG_INLINE void parallel_for_setup(Index dim0, Index dim1,Index dim2, Index &tileSize0, Index &tileSize1, Index &tileSize2, Index &rng0, Index &rng1, Index &rng2, Index &GRange0, Index &GRange1, Index &GRange2) const {
Index max_workgroup_Size = static_cast<Index>(maxSyclThreadsPerBlock()); Index max_workgroup_Size = static_cast<Index>(maxSyclThreadsPerBlock());
if(m_queue.get_device().is_cpu()){ // intel doesnot allow to use max workgroup size if(m_queue.get_device().is_cpu()){ // intel doesn't allow to use max workgroup size
max_workgroup_Size=std::min(static_cast<Index>(256), static_cast<Index>(max_workgroup_Size)); max_workgroup_Size=std::min(static_cast<Index>(256), static_cast<Index>(max_workgroup_Size));
} }
Index pow_of_2 = static_cast<Index>(std::log2(max_workgroup_Size)); Index pow_of_2 = static_cast<Index>(std::log2(max_workgroup_Size));
@ -377,7 +377,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) {
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const { EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
// OpenCL doesnot have such concept // OpenCL doesn't have such concept
return 2; return 2;
} }
@ -519,7 +519,7 @@ struct SyclDevice {
return m_queue_stream->maxSyclThreadsPerBlock(); return m_queue_stream->maxSyclThreadsPerBlock();
} }
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const { EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
// OpenCL doesnot have such concept // OpenCL doesn't have such concept
return m_queue_stream->maxSyclThreadsPerMultiProcessor(); return m_queue_stream->maxSyclThreadsPerMultiProcessor();
// return stream_->deviceProperties().maxThreadsPerMultiProcessor; // return stream_->deviceProperties().maxThreadsPerMultiProcessor;
} }
@ -544,7 +544,7 @@ struct SyclDevice {
}; };
// This is used as a distingushable device inside the kernel as the sycl device class is not Standard layout. // This is used as a distingushable device inside the kernel as the sycl device class is not Standard layout.
// This is internal and must not be used by user. This dummy device allow us to specialise the tensor evaluator // This is internal and must not be used by user. This dummy device allow us to specialise the tensor evaluator
// inside the kenrel. So we can have two types of eval for host and device. This is required for TensorArgMax operation // inside the kernel. So we can have two types of eval for host and device. This is required for TensorArgMax operation
struct SyclKernelDevice:DefaultDevice{}; struct SyclKernelDevice:DefaultDevice{};
} // end namespace Eigen } // end namespace Eigen

View File

@ -274,7 +274,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
} }
} }
// processs the line // process the line
if (is_power_of_two) { if (is_power_of_two) {
processDataLineCooleyTukey(line_buf, line_len, log_len); processDataLineCooleyTukey(line_buf, line_len, log_len);
} }

View File

@ -12,7 +12,7 @@
namespace Eigen { namespace Eigen {
// MakePointer class is used as a container of the adress space of the pointer // MakePointer class is used as a container of the address space of the pointer
// on the host and on the device. From the host side it generates the T* pointer // on the host and on the device. From the host side it generates the T* pointer
// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to // and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to
// T* m_data on the host. It is always called on the device. // T* m_data on the host. It is always called on the device.

View File

@ -272,8 +272,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
break; break;
default: default:
eigen_assert(false && "unexpected padding"); eigen_assert(false && "unexpected padding");
m_outputCols=0; // silence the uninitialised warnig; m_outputCols=0; // silence the uninitialised warning;
m_outputRows=0; //// silence the uninitialised warnig; m_outputRows=0; //// silence the uninitialised warning;
} }
} }
eigen_assert(m_outputRows > 0); eigen_assert(m_outputRows > 0);

View File

@ -167,7 +167,7 @@ struct TensorIntDivisor {
shift2 = log_div > 1 ? log_div-1 : 0; shift2 = log_div > 1 ? log_div-1 : 0;
} }
// Must have 0 <= numerator. On platforms that dont support the __uint128_t // Must have 0 <= numerator. On platforms that don't support the __uint128_t
// type numerator should also be less than 2^32-1. // type numerator should also be less than 2^32-1.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const {
eigen_assert(static_cast<typename UnsignedTraits<T>::type>(numerator) < NumTraits<UnsignedType>::highest()/2); eigen_assert(static_cast<typename UnsignedTraits<T>::type>(numerator) < NumTraits<UnsignedType>::highest()/2);

View File

@ -106,7 +106,7 @@ struct FullReducer<Self, Op, const Eigen::SyclDevice, Vectorizable> {
/// if the shared memory is less than the GRange, we set shared_mem size to the TotalSize and in this case one kernel would be created for recursion to reduce all to one. /// if the shared memory is less than the GRange, we set shared_mem size to the TotalSize and in this case one kernel would be created for recursion to reduce all to one.
if (GRange < outTileSize) outTileSize=GRange; if (GRange < outTileSize) outTileSize=GRange;
/// creating the shared memory for calculating reduction. /// creating the shared memory for calculating reduction.
/// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can /// This one is used to collect all the reduced value of shared memory as we don't have global barrier on GPU. Once it is saved we can
/// recursively apply reduction on it in order to reduce the whole. /// recursively apply reduction on it in order to reduce the whole.
auto temp_global_buffer =cl::sycl::buffer<CoeffReturnType, 1>(cl::sycl::range<1>(GRange)); auto temp_global_buffer =cl::sycl::buffer<CoeffReturnType, 1>(cl::sycl::range<1>(GRange));
typedef typename Eigen::internal::remove_all<decltype(self.xprDims())>::type Dims; typedef typename Eigen::internal::remove_all<decltype(self.xprDims())>::type Dims;
@ -150,7 +150,7 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> {
// getting final out buffer at the moment the created buffer is true because there is no need for assign // getting final out buffer at the moment the created buffer is true because there is no need for assign
/// creating the shared memory for calculating reduction. /// creating the shared memory for calculating reduction.
/// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can /// This one is used to collect all the reduced value of shared memory as we don't have global barrier on GPU. Once it is saved we can
/// recursively apply reduction on it in order to reduce the whole. /// recursively apply reduction on it in order to reduce the whole.
dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange); dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange);
dev.sycl_queue().submit([&](cl::sycl::handler &cgh) { dev.sycl_queue().submit([&](cl::sycl::handler &cgh) {

View File

@ -31,7 +31,7 @@ class TensorLazyBaseEvaluator {
int refCount() const { return m_refcount; } int refCount() const { return m_refcount; }
private: private:
// No copy, no assigment; // No copy, no assignment;
TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other); TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other);
TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other); TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other);

View File

@ -117,7 +117,7 @@ SYCLEXTRFUNCTERNARY()
//TensorCustomOp must be specialised otherewise it will be captured by UnaryCategory while its action is different //TensorCustomOp must be specialised otherwise it will be captured by UnaryCategory while its action is different
//from the UnaryCategory and it is similar to the general FunctorExtractor. //from the UnaryCategory and it is similar to the general FunctorExtractor.
/// specialisation of TensorCustomOp /// specialisation of TensorCustomOp
#define SYCLEXTRFUNCCUSTOMUNARYOP(CVQual)\ #define SYCLEXTRFUNCCUSTOMUNARYOP(CVQual)\

View File

@ -80,7 +80,7 @@ template < typename HostExpr, typename FunctorExpr, typename Tuple_of_Acc, typen
typedef typename ConvertToDeviceExpression<const HostExpr>::Type DevExpr; typedef typename ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
auto device_expr = createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); auto device_expr = createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
/// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
/// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the /// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the
/// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor); const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor);
/// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
@ -121,7 +121,7 @@ class ReductionFunctor<HostExpr, FunctorExpr, Tuple_of_Acc, Dims, Eigen::interna
typedef typename ConvertToDeviceExpression<const HostExpr>::Type DevExpr; typedef typename ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
auto device_expr = createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); auto device_expr = createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
/// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
/// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the /// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the
/// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor); const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor);
/// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
@ -168,7 +168,7 @@ public:
typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr; typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
/// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
/// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the /// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the
/// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, op); const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, op);
/// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
@ -215,7 +215,7 @@ public:
typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr; typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors); auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
/// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour /// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
/// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the /// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the
/// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here. /// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, op); const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, op);
/// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is /// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is

View File

@ -143,7 +143,7 @@ struct IndexList {};
/// \brief Collects internal details for generating index ranges [MIN, MAX) /// \brief Collects internal details for generating index ranges [MIN, MAX)
/// Declare primary template for index range builder /// Declare primary template for index range builder
/// \tparam MIN is the starting index in the tuple /// \tparam MIN is the starting index in the tuple
/// \tparam N represents sizeof..(elemens)- sizeof...(Is) /// \tparam N represents sizeof..(elements)- sizeof...(Is)
/// \tparam Is... are the list of generated index so far /// \tparam Is... are the list of generated index so far
template <size_t MIN, size_t N, size_t... Is> template <size_t MIN, size_t N, size_t... Is>
struct RangeBuilder; struct RangeBuilder;
@ -161,7 +161,7 @@ struct RangeBuilder<MIN, MIN, Is...> {
/// in this case we are recursively subtracting N by one and adding one /// in this case we are recursively subtracting N by one and adding one
/// index to Is... list until MIN==N /// index to Is... list until MIN==N
/// \tparam MIN is the starting index in the tuple /// \tparam MIN is the starting index in the tuple
/// \tparam N represents sizeof..(elemens)- sizeof...(Is) /// \tparam N represents sizeof..(elements)- sizeof...(Is)
/// \tparam Is... are the list of generated index so far /// \tparam Is... are the list of generated index so far
template <size_t MIN, size_t N, size_t... Is> template <size_t MIN, size_t N, size_t... Is>
struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {}; struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {};

View File

@ -568,7 +568,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
Dimensions m_dimensions; Dimensions m_dimensions;
// Parameters passed to the costructor. // Parameters passed to the constructor.
Index m_plane_strides; Index m_plane_strides;
Index m_row_strides; Index m_row_strides;
Index m_col_strides; Index m_col_strides;

View File

@ -241,7 +241,7 @@ struct dimino_first_step_elements
* multiplying all elements in the given subgroup with the new * multiplying all elements in the given subgroup with the new
* coset representative. Note that the first element of the * coset representative. Note that the first element of the
* subgroup is always the identity element, so the first element of * subgroup is always the identity element, so the first element of
* ther result of this template is going to be the coset * the result of this template is going to be the coset
* representative itself. * representative itself.
* *
* Note that this template accepts an additional boolean parameter * Note that this template accepts an additional boolean parameter

View File

@ -33,10 +33,10 @@ namespace Eigen {
// ec.Notify(true); // ec.Notify(true);
// //
// Notify is cheap if there are no waiting threads. Prewait/CommitWait are not // Notify is cheap if there are no waiting threads. Prewait/CommitWait are not
// cheap, but they are executed only if the preceeding predicate check has // cheap, but they are executed only if the preceding predicate check has
// failed. // failed.
// //
// Algorihtm outline: // Algorithm outline:
// There are two main variables: predicate (managed by user) and state_. // There are two main variables: predicate (managed by user) and state_.
// Operation closely resembles Dekker mutual algorithm: // Operation closely resembles Dekker mutual algorithm:
// https://en.wikipedia.org/wiki/Dekker%27s_algorithm // https://en.wikipedia.org/wiki/Dekker%27s_algorithm
@ -79,7 +79,7 @@ class EventCount {
uint64_t state = state_.load(std::memory_order_seq_cst); uint64_t state = state_.load(std::memory_order_seq_cst);
for (;;) { for (;;) {
if (int64_t((state & kEpochMask) - epoch) < 0) { if (int64_t((state & kEpochMask) - epoch) < 0) {
// The preceeding waiter has not decided on its fate. Wait until it // The preceding waiter has not decided on its fate. Wait until it
// calls either CancelWait or CommitWait, or is notified. // calls either CancelWait or CommitWait, or is notified.
EIGEN_THREAD_YIELD(); EIGEN_THREAD_YIELD();
state = state_.load(std::memory_order_seq_cst); state = state_.load(std::memory_order_seq_cst);
@ -110,7 +110,7 @@ class EventCount {
uint64_t state = state_.load(std::memory_order_relaxed); uint64_t state = state_.load(std::memory_order_relaxed);
for (;;) { for (;;) {
if (int64_t((state & kEpochMask) - epoch) < 0) { if (int64_t((state & kEpochMask) - epoch) < 0) {
// The preceeding waiter has not decided on its fate. Wait until it // The preceding waiter has not decided on its fate. Wait until it
// calls either CancelWait or CommitWait, or is notified. // calls either CancelWait or CommitWait, or is notified.
EIGEN_THREAD_YIELD(); EIGEN_THREAD_YIELD();
state = state_.load(std::memory_order_relaxed); state = state_.load(std::memory_order_relaxed);

View File

@ -198,7 +198,7 @@ class RunQueue {
}; };
std::mutex mutex_; std::mutex mutex_;
// Low log(kSize) + 1 bits in front_ and back_ contain rolling index of // Low log(kSize) + 1 bits in front_ and back_ contain rolling index of
// front/back, repsectively. The remaining bits contain modification counters // front/back, respectively. The remaining bits contain modification counters
// that are incremented on Push operations. This allows us to (1) distinguish // that are incremented on Push operations. This allows us to (1) distinguish
// between empty and full conditions (if we would use log(kSize) bits for // between empty and full conditions (if we would use log(kSize) bits for
// position, these conditions would be indistinguishable); (2) obtain // position, these conditions would be indistinguishable); (2) obtain

View File

@ -219,7 +219,7 @@ template<class T, std::size_t N> struct array_size<const array<T,N>& > {
#else #else
// The compiler supports c++11, and we're not targetting cuda: use std::array as Eigen::array // The compiler supports c++11, and we're not targeting cuda: use std::array as Eigen::array
#include <array> #include <array>
namespace Eigen { namespace Eigen {

View File

@ -35,7 +35,7 @@
* a zero for the system (Powell hybrid "dogleg" method). * a zero for the system (Powell hybrid "dogleg" method).
* *
* This code is a port of minpack (http://en.wikipedia.org/wiki/MINPACK). * This code is a port of minpack (http://en.wikipedia.org/wiki/MINPACK).
* Minpack is a very famous, old, robust and well-reknown package, written in * Minpack is a very famous, old, robust and well renowned package, written in
* fortran. Those implementations have been carefully tuned, tested, and used * fortran. Those implementations have been carefully tuned, tested, and used
* for several decades. * for several decades.
* *
@ -63,7 +63,7 @@
* Other tests were added by myself at the very beginning of the * Other tests were added by myself at the very beginning of the
* process and check the results for levenberg-marquardt using the reference data * process and check the results for levenberg-marquardt using the reference data
* on http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml. Since then i've * on http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml. Since then i've
* carefully checked that the same results were obtained when modifiying the * carefully checked that the same results were obtained when modifying the
* code. Please note that we do not always get the exact same decimals as they do, * code. Please note that we do not always get the exact same decimals as they do,
* but this is ok : they use 128bits float, and we do the tests using the C type 'double', * but this is ok : they use 128bits float, and we do the tests using the C type 'double',
* which is 64 bits on most platforms (x86 and amd64, at least). * which is 64 bits on most platforms (x86 and amd64, at least).

View File

@ -25,7 +25,7 @@ namespace Eigen {
* *
* This module provides wrapper functions for a couple of OpenGL functions * This module provides wrapper functions for a couple of OpenGL functions
* which simplify the way to pass Eigen's object to openGL. * which simplify the way to pass Eigen's object to openGL.
* Here is an exmaple: * Here is an example:
* *
* \code * \code
* // You need to add path_to_eigen/unsupported to your include path. * // You need to add path_to_eigen/unsupported to your include path.

View File

@ -29,6 +29,8 @@ namespace Eigen {
* - erfc * - erfc
* - lgamma * - lgamma
* - igamma * - igamma
* - igamma_der_a
* - gamma_sample_der_alpha
* - igammac * - igammac
* - digamma * - digamma
* - polygamma * - polygamma

View File

@ -170,7 +170,7 @@ private:
typedef internal::vector_int_pair<Scalar, Dim> VIPair; typedef internal::vector_int_pair<Scalar, Dim> VIPair;
typedef std::vector<VIPair, aligned_allocator<VIPair> > VIPairList; typedef std::vector<VIPair, aligned_allocator<VIPair> > VIPairList;
typedef Matrix<Scalar, Dim, 1> VectorType; typedef Matrix<Scalar, Dim, 1> VectorType;
struct VectorComparator //compares vectors, or, more specificall, VIPairs along a particular dimension struct VectorComparator //compares vectors, or more specifically, VIPairs along a particular dimension
{ {
VectorComparator(int inDim) : dim(inDim) {} VectorComparator(int inDim) : dim(inDim) {}
inline bool operator()(const VIPair &v1, const VIPair &v2) const { return v1.first[dim] < v2.first[dim]; } inline bool operator()(const VIPair &v1, const VIPair &v2) const { return v1.first[dim] < v2.first[dim]; }

View File

@ -300,7 +300,7 @@ public:
/** \brief Reports whether previous computation was successful. /** \brief Reports whether previous computation was successful.
* *
* \returns \c Success if computation was succesful, \c NoConvergence otherwise. * \returns \c Success if computation was successful, \c NoConvergence otherwise.
*/ */
ComputationInfo info() const ComputationInfo info() const
{ {

View File

@ -12,7 +12,7 @@
namespace Eigen namespace Eigen
{ {
// Forward declerations // Forward declarations
template <typename _Scalar, class _System> template <typename _Scalar, class _System>
class EulerAngles; class EulerAngles;

View File

@ -99,7 +99,7 @@ void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV)
/** \ingroup IterativeSolvers_Module /** \ingroup IterativeSolvers_Module
* Constrained conjugate gradient * Constrained conjugate gradient
* *
* Computes the minimum of \f$ 1/2((Ax).x) - bx \f$ under the contraint \f$ Cx \le f \f$ * Computes the minimum of \f$ 1/2((Ax).x) - bx \f$ under the constraint \f$ Cx \le f \f$
*/ */
template<typename TMatrix, typename CMatrix, template<typename TMatrix, typename CMatrix,
typename VectorX, typename VectorB, typename VectorF> typename VectorX, typename VectorB, typename VectorF>

View File

@ -39,7 +39,6 @@ template <typename VectorType, typename IndexType>
void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut) void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut)
{ {
eigen_assert(vec.size() == perm.size()); eigen_assert(vec.size() == perm.size());
typedef typename IndexType::Scalar Index;
bool flag; bool flag;
for (Index k = 0; k < ncut; k++) for (Index k = 0; k < ncut; k++)
{ {
@ -112,7 +111,6 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
using Base::_solve_impl; using Base::_solve_impl;
typedef _MatrixType MatrixType; typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::StorageIndex StorageIndex;
typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::RealScalar RealScalar;
typedef _Preconditioner Preconditioner; typedef _Preconditioner Preconditioner;
@ -146,7 +144,7 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
void _solve_with_guess_impl(const Rhs& b, Dest& x) const void _solve_with_guess_impl(const Rhs& b, Dest& x) const
{ {
bool failed = false; bool failed = false;
for(int j=0; j<b.cols(); ++j) for(Index j=0; j<b.cols(); ++j)
{ {
m_iterations = Base::maxIterations(); m_iterations = Base::maxIterations();
m_error = Base::m_tolerance; m_error = Base::m_tolerance;
@ -170,17 +168,17 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
/** /**
* Get the restart value * Get the restart value
*/ */
int restart() { return m_restart; } Index restart() { return m_restart; }
/** /**
* Set the restart value (default is 30) * Set the restart value (default is 30)
*/ */
void set_restart(const int restart) { m_restart=restart; } void set_restart(const Index restart) { m_restart=restart; }
/** /**
* Set the number of eigenvalues to deflate at each restart * Set the number of eigenvalues to deflate at each restart
*/ */
void setEigenv(const int neig) void setEigenv(const Index neig)
{ {
m_neig = neig; m_neig = neig;
if (neig+1 > m_maxNeig) m_maxNeig = neig+1; // To allow for complex conjugates if (neig+1 > m_maxNeig) m_maxNeig = neig+1; // To allow for complex conjugates
@ -189,12 +187,12 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
/** /**
* Get the size of the deflation subspace size * Get the size of the deflation subspace size
*/ */
int deflSize() {return m_r; } Index deflSize() {return m_r; }
/** /**
* Set the maximum size of the deflation subspace * Set the maximum size of the deflation subspace
*/ */
void setMaxEigenv(const int maxNeig) { m_maxNeig = maxNeig; } void setMaxEigenv(const Index maxNeig) { m_maxNeig = maxNeig; }
protected: protected:
// DGMRES algorithm // DGMRES algorithm
@ -202,27 +200,27 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
void dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x, const Preconditioner& precond) const; void dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x, const Preconditioner& precond) const;
// Perform one cycle of GMRES // Perform one cycle of GMRES
template<typename Dest> template<typename Dest>
int dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const; Index dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const;
// Compute data to use for deflation // Compute data to use for deflation
int dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const; Index dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const;
// Apply deflation to a vector // Apply deflation to a vector
template<typename RhsType, typename DestType> template<typename RhsType, typename DestType>
int dgmresApplyDeflation(const RhsType& In, DestType& Out) const; Index dgmresApplyDeflation(const RhsType& In, DestType& Out) const;
ComplexVector schurValues(const ComplexSchur<DenseMatrix>& schurofH) const; ComplexVector schurValues(const ComplexSchur<DenseMatrix>& schurofH) const;
ComplexVector schurValues(const RealSchur<DenseMatrix>& schurofH) const; ComplexVector schurValues(const RealSchur<DenseMatrix>& schurofH) const;
// Init data for deflation // Init data for deflation
void dgmresInitDeflation(Index& rows) const; void dgmresInitDeflation(Index& rows) const;
mutable DenseMatrix m_V; // Krylov basis vectors mutable DenseMatrix m_V; // Krylov basis vectors
mutable DenseMatrix m_H; // Hessenberg matrix mutable DenseMatrix m_H; // Hessenberg matrix
mutable DenseMatrix m_Hes; // Initial hessenberg matrix wihout Givens rotations applied mutable DenseMatrix m_Hes; // Initial hessenberg matrix without Givens rotations applied
mutable Index m_restart; // Maximum size of the Krylov subspace mutable Index m_restart; // Maximum size of the Krylov subspace
mutable DenseMatrix m_U; // Vectors that form the basis of the invariant subspace mutable DenseMatrix m_U; // Vectors that form the basis of the invariant subspace
mutable DenseMatrix m_MU; // matrix operator applied to m_U (for next cycles) mutable DenseMatrix m_MU; // matrix operator applied to m_U (for next cycles)
mutable DenseMatrix m_T; /* T=U^T*M^{-1}*A*U */ mutable DenseMatrix m_T; /* T=U^T*M^{-1}*A*U */
mutable PartialPivLU<DenseMatrix> m_luT; // LU factorization of m_T mutable PartialPivLU<DenseMatrix> m_luT; // LU factorization of m_T
mutable StorageIndex m_neig; //Number of eigenvalues to extract at each restart mutable StorageIndex m_neig; //Number of eigenvalues to extract at each restart
mutable int m_r; // Current number of deflated eigenvalues, size of m_U mutable Index m_r; // Current number of deflated eigenvalues, size of m_U
mutable int m_maxNeig; // Maximum number of eigenvalues to deflate mutable Index m_maxNeig; // Maximum number of eigenvalues to deflate
mutable RealScalar m_lambdaN; //Modulus of the largest eigenvalue of A mutable RealScalar m_lambdaN; //Modulus of the largest eigenvalue of A
mutable bool m_isDeflAllocated; mutable bool m_isDeflAllocated;
mutable bool m_isDeflInitialized; mutable bool m_isDeflInitialized;
@ -244,13 +242,13 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh
const Preconditioner& precond) const const Preconditioner& precond) const
{ {
//Initialization //Initialization
int n = mat.rows(); Index n = mat.rows();
DenseVector r0(n); DenseVector r0(n);
int nbIts = 0; Index nbIts = 0;
m_H.resize(m_restart+1, m_restart); m_H.resize(m_restart+1, m_restart);
m_Hes.resize(m_restart, m_restart); m_Hes.resize(m_restart, m_restart);
m_V.resize(n,m_restart+1); m_V.resize(n,m_restart+1);
//Initial residual vector and intial norm //Initial residual vector and initial norm
x = precond.solve(x); x = precond.solve(x);
r0 = rhs - mat * x; r0 = rhs - mat * x;
RealScalar beta = r0.norm(); RealScalar beta = r0.norm();
@ -284,7 +282,7 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh
*/ */
template< typename _MatrixType, typename _Preconditioner> template< typename _MatrixType, typename _Preconditioner>
template<typename Dest> template<typename Dest>
int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const Index DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const
{ {
//Initialization //Initialization
DenseVector g(m_restart+1); // Right hand side of the least square problem DenseVector g(m_restart+1); // Right hand side of the least square problem
@ -293,8 +291,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
m_V.col(0) = r0/beta; m_V.col(0) = r0/beta;
m_info = NoConvergence; m_info = NoConvergence;
std::vector<JacobiRotation<Scalar> >gr(m_restart); // Givens rotations std::vector<JacobiRotation<Scalar> >gr(m_restart); // Givens rotations
int it = 0; // Number of inner iterations Index it = 0; // Number of inner iterations
int n = mat.rows(); Index n = mat.rows();
DenseVector tv1(n), tv2(n); //Temporary vectors DenseVector tv1(n), tv2(n); //Temporary vectors
while (m_info == NoConvergence && it < m_restart && nbIts < m_iterations) while (m_info == NoConvergence && it < m_restart && nbIts < m_iterations)
{ {
@ -312,7 +310,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
// Orthogonalize it with the previous basis in the basis using modified Gram-Schmidt // Orthogonalize it with the previous basis in the basis using modified Gram-Schmidt
Scalar coef; Scalar coef;
for (int i = 0; i <= it; ++i) for (Index i = 0; i <= it; ++i)
{ {
coef = tv1.dot(m_V.col(i)); coef = tv1.dot(m_V.col(i));
tv1 = tv1 - coef * m_V.col(i); tv1 = tv1 - coef * m_V.col(i);
@ -328,7 +326,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
// FIXME Check for happy breakdown // FIXME Check for happy breakdown
// Update Hessenberg matrix with Givens rotations // Update Hessenberg matrix with Givens rotations
for (int i = 1; i <= it; ++i) for (Index i = 1; i <= it; ++i)
{ {
m_H.col(it).applyOnTheLeft(i-1,i,gr[i-1].adjoint()); m_H.col(it).applyOnTheLeft(i-1,i,gr[i-1].adjoint());
} }
@ -418,7 +416,7 @@ inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_Matr
} }
template< typename _MatrixType, typename _Preconditioner> template< typename _MatrixType, typename _Preconditioner>
int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const Index DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const
{ {
// First, find the Schur form of the Hessenberg matrix H // First, find the Schur form of the Hessenberg matrix H
typename internal::conditional<NumTraits<Scalar>::IsComplex, ComplexSchur<DenseMatrix>, RealSchur<DenseMatrix> >::type schurofH; typename internal::conditional<NumTraits<Scalar>::IsComplex, ComplexSchur<DenseMatrix>, RealSchur<DenseMatrix> >::type schurofH;
@ -433,8 +431,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
// Reorder the absolute values of Schur values // Reorder the absolute values of Schur values
DenseRealVector modulEig(it); DenseRealVector modulEig(it);
for (int j=0; j<it; ++j) modulEig(j) = std::abs(eig(j)); for (Index j=0; j<it; ++j) modulEig(j) = std::abs(eig(j));
perm.setLinSpaced(it,0,it-1); perm.setLinSpaced(it,0,internal::convert_index<StorageIndex>(it-1));
internal::sortWithPermutation(modulEig, perm, neig); internal::sortWithPermutation(modulEig, perm, neig);
if (!m_lambdaN) if (!m_lambdaN)
@ -442,7 +440,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
m_lambdaN = (std::max)(modulEig.maxCoeff(), m_lambdaN); m_lambdaN = (std::max)(modulEig.maxCoeff(), m_lambdaN);
} }
//Count the real number of extracted eigenvalues (with complex conjugates) //Count the real number of extracted eigenvalues (with complex conjugates)
int nbrEig = 0; Index nbrEig = 0;
while (nbrEig < neig) while (nbrEig < neig)
{ {
if(eig(perm(it-nbrEig-1)).imag() == RealScalar(0)) nbrEig++; if(eig(perm(it-nbrEig-1)).imag() == RealScalar(0)) nbrEig++;
@ -451,7 +449,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
// Extract the Schur vectors corresponding to the smallest Ritz values // Extract the Schur vectors corresponding to the smallest Ritz values
DenseMatrix Sr(it, nbrEig); DenseMatrix Sr(it, nbrEig);
Sr.setZero(); Sr.setZero();
for (int j = 0; j < nbrEig; j++) for (Index j = 0; j < nbrEig; j++)
{ {
Sr.col(j) = schurofH.matrixU().col(perm(it-j-1)); Sr.col(j) = schurofH.matrixU().col(perm(it-j-1));
} }
@ -462,8 +460,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
if (m_r) if (m_r)
{ {
// Orthogonalize X against m_U using modified Gram-Schmidt // Orthogonalize X against m_U using modified Gram-Schmidt
for (int j = 0; j < nbrEig; j++) for (Index j = 0; j < nbrEig; j++)
for (int k =0; k < m_r; k++) for (Index k =0; k < m_r; k++)
X.col(j) = X.col(j) - (m_U.col(k).dot(X.col(j)))*m_U.col(k); X.col(j) = X.col(j) - (m_U.col(k).dot(X.col(j)))*m_U.col(k);
} }
@ -473,7 +471,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
dgmresInitDeflation(m); dgmresInitDeflation(m);
DenseMatrix MX(m, nbrEig); DenseMatrix MX(m, nbrEig);
DenseVector tv1(m); DenseVector tv1(m);
for (int j = 0; j < nbrEig; j++) for (Index j = 0; j < nbrEig; j++)
{ {
tv1 = mat * X.col(j); tv1 = mat * X.col(j);
MX.col(j) = precond.solve(tv1); MX.col(j) = precond.solve(tv1);
@ -488,8 +486,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
} }
// Save X into m_U and m_MX in m_MU // Save X into m_U and m_MX in m_MU
for (int j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j); for (Index j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j);
for (int j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j); for (Index j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j);
// Increase the size of the invariant subspace // Increase the size of the invariant subspace
m_r += nbrEig; m_r += nbrEig;
@ -502,7 +500,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
} }
template<typename _MatrixType, typename _Preconditioner> template<typename _MatrixType, typename _Preconditioner>
template<typename RhsType, typename DestType> template<typename RhsType, typename DestType>
int DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const Index DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const
{ {
DenseVector x1 = m_U.leftCols(m_r).transpose() * x; DenseVector x1 = m_U.leftCols(m_r).transpose() * x;
y = x + m_U.leftCols(m_r) * ( m_lambdaN * m_luT.solve(x1) - x1); y = x + m_U.leftCols(m_r) * ( m_lambdaN * m_luT.solve(x1) - x1);

View File

@ -73,7 +73,7 @@ void lmqrsolv(
qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj; qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj;
wa[k] = temp; wa[k] = temp;
/* accumulate the tranformation in the row of s. */ /* accumulate the transformation in the row of s. */
for (i = k+1; i<n; ++i) { for (i = k+1; i<n; ++i) {
temp = givens.c() * s(i,k) + givens.s() * sdiag[i]; temp = givens.c() * s(i,k) + givens.s() * sdiag[i];
sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i]; sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i];

View File

@ -233,9 +233,9 @@ class LevenbergMarquardt : internal::no_assignment_operator
/** /**
* \brief Reports whether the minimization was successful * \brief Reports whether the minimization was successful
* \returns \c Success if the minimization was succesful, * \returns \c Success if the minimization was successful,
* \c NumericalIssue if a numerical problem arises during the * \c NumericalIssue if a numerical problem arises during the
* minimization process, for exemple during the QR factorization * minimization process, for example during the QR factorization
* \c NoConvergence if the minimization did not converge after * \c NoConvergence if the minimization did not converge after
* the maximum number of function evaluation allowed * the maximum number of function evaluation allowed
* \c InvalidInput if the input matrix is invalid * \c InvalidInput if the input matrix is invalid

View File

@ -313,7 +313,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
matrix_exp_pade17(A, U, V); matrix_exp_pade17(A, U, V);
} }
#elif LDBL_MANT_DIG <= 112 // quadruple precison #elif LDBL_MANT_DIG <= 112 // quadruple precision
if (l1norm < 1.639394610288918690547467954466970e-005L) { if (l1norm < 1.639394610288918690547467954466970e-005L) {
matrix_exp_pade3(arg, U, V); matrix_exp_pade3(arg, U, V);

View File

@ -81,7 +81,7 @@ class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParen
* *
* \note Currently this class is only used by MatrixPower. One may * \note Currently this class is only used by MatrixPower. One may
* insist that this be nested into MatrixPower. This class is here to * insist that this be nested into MatrixPower. This class is here to
* faciliate future development of triangular matrix functions. * facilitate future development of triangular matrix functions.
*/ */
template<typename MatrixType> template<typename MatrixType>
class MatrixPowerAtomic : internal::noncopyable class MatrixPowerAtomic : internal::noncopyable

View File

@ -61,7 +61,7 @@ void qrsolv(
qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj; qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj;
wa[k] = temp; wa[k] = temp;
/* accumulate the tranformation in the row of s. */ /* accumulate the transformation in the row of s. */
for (i = k+1; i<n; ++i) { for (i = k+1; i<n; ++i) {
temp = givens.c() * s(i,k) + givens.s() * sdiag[i]; temp = givens.c() * s(i,k) + givens.s() * sdiag[i];
sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i]; sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i];

View File

@ -22,7 +22,7 @@ void r1updt(
Scalar temp; Scalar temp;
JacobiRotation<Scalar> givens; JacobiRotation<Scalar> givens;
// r1updt had a broader usecase, but we dont use it here. And, more // r1updt had a broader usecase, but we don't use it here. And, more
// importantly, we can not test it. // importantly, we can not test it.
eigen_assert(m==n); eigen_assert(m==n);
eigen_assert(u.size()==m); eigen_assert(u.size()==m);

View File

@ -104,7 +104,7 @@ class companion
/** Helper function for the balancing algorithm. /** Helper function for the balancing algorithm.
* \returns true if the row and the column, having colNorm and rowNorm * \returns true if the row and the column, having colNorm and rowNorm
* as norms, are balanced, false otherwise. * as norms, are balanced, false otherwise.
* colB and rowB are repectively the multipliers for * colB and rowB are respectively the multipliers for
* the column and the row in order to balance them. * the column and the row in order to balance them.
* */ * */
bool balanced( RealScalar colNorm, RealScalar rowNorm, bool balanced( RealScalar colNorm, RealScalar rowNorm,
@ -113,7 +113,7 @@ class companion
/** Helper function for the balancing algorithm. /** Helper function for the balancing algorithm.
* \returns true if the row and the column, having colNorm and rowNorm * \returns true if the row and the column, having colNorm and rowNorm
* as norms, are balanced, false otherwise. * as norms, are balanced, false otherwise.
* colB and rowB are repectively the multipliers for * colB and rowB are respectively the multipliers for
* the column and the row in order to balance them. * the column and the row in order to balance them.
* */ * */
bool balancedR( RealScalar colNorm, RealScalar rowNorm, bool balancedR( RealScalar colNorm, RealScalar rowNorm,

View File

@ -41,7 +41,7 @@ public:
/** Sets the relative threshold value used to prune zero coefficients during the decomposition. /** Sets the relative threshold value used to prune zero coefficients during the decomposition.
* *
* Setting a value greater than zero speeds up computation, and yields to an imcomplete * Setting a value greater than zero speeds up computation, and yields to an incomplete
* factorization with fewer non zero coefficients. Such approximate factors are especially * factorization with fewer non zero coefficients. Such approximate factors are especially
* useful to initialize an iterative solver. * useful to initialize an iterative solver.
* *

View File

@ -206,26 +206,26 @@ public:
if (col > row) //upper matrix if (col > row) //upper matrix
{ {
const Index minOuterIndex = inner - m_data.upperProfile(inner); const Index minOuterIndex = inner - m_data.upperProfile(inner);
eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage"); eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner)));
} }
if (col < row) //lower matrix if (col < row) //lower matrix
{ {
const Index minInnerIndex = outer - m_data.lowerProfile(outer); const Index minInnerIndex = outer - m_data.lowerProfile(outer);
eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage"); eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer)));
} }
} else { } else {
if (outer > inner) //upper matrix if (outer > inner) //upper matrix
{ {
const Index maxOuterIndex = inner + m_data.upperProfile(inner); const Index maxOuterIndex = inner + m_data.upperProfile(inner);
eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage"); eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); return this->m_data.upper(m_colStartIndex[inner] + (outer - inner));
} }
if (outer < inner) //lower matrix if (outer < inner) //lower matrix
{ {
const Index maxInnerIndex = outer + m_data.lowerProfile(outer); const Index maxInnerIndex = outer + m_data.lowerProfile(outer);
eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage"); eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer));
} }
} }
@ -300,11 +300,11 @@ public:
if (IsRowMajor) { if (IsRowMajor) {
const Index minInnerIndex = outer - m_data.lowerProfile(outer); const Index minInnerIndex = outer - m_data.lowerProfile(outer);
eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage"); eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer))); return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer)));
} else { } else {
const Index maxInnerIndex = outer + m_data.lowerProfile(outer); const Index maxInnerIndex = outer + m_data.lowerProfile(outer);
eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage"); eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer)); return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer));
} }
} }
@ -336,11 +336,11 @@ public:
if (IsRowMajor) { if (IsRowMajor) {
const Index minOuterIndex = inner - m_data.upperProfile(inner); const Index minOuterIndex = inner - m_data.upperProfile(inner);
eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage"); eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner))); return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner)));
} else { } else {
const Index maxOuterIndex = inner + m_data.upperProfile(inner); const Index maxOuterIndex = inner + m_data.upperProfile(inner);
eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage"); eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage");
return this->m_data.upper(m_colStartIndex[inner] + (outer - inner)); return this->m_data.upper(m_colStartIndex[inner] + (outer - inner));
} }
} }

View File

@ -187,7 +187,7 @@ template<typename _Scalar, int _Options, typename _StorageIndex>
/** Does nothing: provided for compatibility with SparseMatrix */ /** Does nothing: provided for compatibility with SparseMatrix */
inline void finalize() {} inline void finalize() {}
/** Suppress all nonzeros which are smaller than \a reference under the tolerence \a epsilon */ /** Suppress all nonzeros which are smaller than \a reference under the tolerance \a epsilon */
void prune(Scalar reference, RealScalar epsilon = NumTraits<RealScalar>::dummy_precision()) void prune(Scalar reference, RealScalar epsilon = NumTraits<RealScalar>::dummy_precision())
{ {
for (Index j=0; j<outerSize(); ++j) for (Index j=0; j<outerSize(); ++j)
@ -224,21 +224,21 @@ template<typename _Scalar, int _Options, typename _StorageIndex>
} }
} }
/** The class DynamicSparseMatrix is deprectaed */ /** The class DynamicSparseMatrix is deprecated */
EIGEN_DEPRECATED inline DynamicSparseMatrix() EIGEN_DEPRECATED inline DynamicSparseMatrix()
: m_innerSize(0), m_data(0) : m_innerSize(0), m_data(0)
{ {
eigen_assert(innerSize()==0 && outerSize()==0); eigen_assert(innerSize()==0 && outerSize()==0);
} }
/** The class DynamicSparseMatrix is deprectaed */ /** The class DynamicSparseMatrix is deprecated */
EIGEN_DEPRECATED inline DynamicSparseMatrix(Index rows, Index cols) EIGEN_DEPRECATED inline DynamicSparseMatrix(Index rows, Index cols)
: m_innerSize(0) : m_innerSize(0)
{ {
resize(rows, cols); resize(rows, cols);
} }
/** The class DynamicSparseMatrix is deprectaed */ /** The class DynamicSparseMatrix is deprecated */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEPRECATED explicit inline DynamicSparseMatrix(const SparseMatrixBase<OtherDerived>& other) EIGEN_DEPRECATED explicit inline DynamicSparseMatrix(const SparseMatrixBase<OtherDerived>& other)
: m_innerSize(0) : m_innerSize(0)

View File

@ -104,7 +104,7 @@ namespace internal
out << value.real << " " << value.imag()<< "\n"; out << value.real << " " << value.imag()<< "\n";
} }
} // end namepsace internal } // end namespace internal
inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isvector) inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isvector)
{ {

View File

@ -33,6 +33,48 @@ igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerive
); );
} }
/** \cpp11 \returns an expression of the coefficient-wise igamma_der_a(\a a, \a x) to the given arrays.
*
* This function computes the coefficient-wise derivative of the incomplete
* gamma function with respect to the parameter a.
*
* \note This function supports only float and double scalar types in c++11
* mode. To support other scalar types,
* or float/double in non c++11 mode, the user has to provide implementations
* of igamma_der_a(T,T) for any scalar
* type T to be supported.
*
* \sa Eigen::igamma(), Eigen::lgamma()
*/
template <typename Derived, typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igamma_der_a(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) {
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
a.derived(),
x.derived());
}
/** \cpp11 \returns an expression of the coefficient-wise gamma_sample_der_alpha(\a alpha, \a sample) to the given arrays.
*
* This function computes the coefficient-wise derivative of the sample
* of a Gamma(alpha, 1) random variable with respect to the parameter alpha.
*
* \note This function supports only float and double scalar types in c++11
* mode. To support other scalar types,
* or float/double in non c++11 mode, the user has to provide implementations
* of gamma_sample_der_alpha(T,T) for any scalar
* type T to be supported.
*
* \sa Eigen::igamma(), Eigen::lgamma()
*/
template <typename AlphaDerived, typename SampleDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived>
gamma_sample_der_alpha(const Eigen::ArrayBase<AlphaDerived>& alpha, const Eigen::ArrayBase<SampleDerived>& sample) {
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived>(
alpha.derived(),
sample.derived());
}
/** \cpp11 \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays. /** \cpp11 \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays.
* *
* This function computes the coefficient-wise complementary incomplete gamma function. * This function computes the coefficient-wise complementary incomplete gamma function.

View File

@ -41,6 +41,60 @@ struct functor_traits<scalar_igamma_op<Scalar> > {
}; };
}; };
/** \internal
* \brief Template functor to compute the derivative of the incomplete gamma
* function igamma_der_a(a, x)
*
* \sa class CwiseBinaryOp, Cwise::igamma_der_a
*/
template <typename Scalar>
struct scalar_igamma_der_a_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_der_a_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a, const Scalar& x) const {
using numext::igamma_der_a;
return igamma_der_a(a, x);
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const {
return internal::pigamma_der_a(a, x);
}
};
template <typename Scalar>
struct functor_traits<scalar_igamma_der_a_op<Scalar> > {
enum {
// 2x the cost of igamma
Cost = 40 * NumTraits<Scalar>::MulCost + 20 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasIGammaDerA
};
};
/** \internal
* \brief Template functor to compute the derivative of the sample
* of a Gamma(alpha, 1) random variable with respect to the parameter alpha
* gamma_sample_der_alpha(alpha, sample)
*
* \sa class CwiseBinaryOp, Cwise::gamma_sample_der_alpha
*/
template <typename Scalar>
struct scalar_gamma_sample_der_alpha_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_gamma_sample_der_alpha_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& alpha, const Scalar& sample) const {
using numext::gamma_sample_der_alpha;
return gamma_sample_der_alpha(alpha, sample);
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& alpha, const Packet& sample) const {
return internal::pgamma_sample_der_alpha(alpha, sample);
}
};
template <typename Scalar>
struct functor_traits<scalar_gamma_sample_der_alpha_op<Scalar> > {
enum {
// 2x the cost of igamma, minus the lgamma cost (the lgamma cancels out)
Cost = 30 * NumTraits<Scalar>::MulCost + 15 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasGammaSampleDerAlpha
};
};
/** \internal /** \internal
* \brief Template functor to compute the complementary incomplete gamma function igammac(a, x) * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x)

View File

@ -33,6 +33,14 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::h
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) { template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x))); return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
} }
template <>
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma_der_a(const Eigen::half& a, const Eigen::half& x) {
return Eigen::half(Eigen::numext::igamma_der_a(static_cast<float>(a), static_cast<float>(x)));
}
template <>
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half gamma_sample_der_alpha(const Eigen::half& alpha, const Eigen::half& sample) {
return Eigen::half(Eigen::numext::gamma_sample_der_alpha(static_cast<float>(alpha), static_cast<float>(sample)));
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) { template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) {
return Eigen::half(Eigen::numext::igammac(static_cast<float>(a), static_cast<float>(x))); return Eigen::half(Eigen::numext::igammac(static_cast<float>(a), static_cast<float>(x)));
} }

View File

@ -521,6 +521,197 @@ struct cephes_helper<double> {
} }
}; };
enum IgammaComputationMode { VALUE, DERIVATIVE, SAMPLE_DERIVATIVE };
template <typename Scalar, IgammaComputationMode mode>
EIGEN_DEVICE_FUNC
int igamma_num_iterations() {
/* Returns the maximum number of internal iterations for igamma computation.
*/
if (mode == VALUE) {
return 2000;
}
if (internal::is_same<Scalar, float>::value) {
return 200;
} else if (internal::is_same<Scalar, double>::value) {
return 500;
} else {
return 2000;
}
}
template <typename Scalar, IgammaComputationMode mode>
struct igammac_cf_impl {
/* Computes igamc(a, x) or derivative (depending on the mode)
* using the continued fraction expansion of the complementary
* incomplete Gamma function.
*
* Preconditions:
* a > 0
* x >= 1
* x >= a
*/
EIGEN_DEVICE_FUNC
static Scalar run(Scalar a, Scalar x) {
const Scalar zero = 0;
const Scalar one = 1;
const Scalar two = 2;
const Scalar machep = cephes_helper<Scalar>::machep();
const Scalar big = cephes_helper<Scalar>::big();
const Scalar biginv = cephes_helper<Scalar>::biginv();
if ((numext::isinf)(x)) {
return zero;
}
// continued fraction
Scalar y = one - a;
Scalar z = x + y + one;
Scalar c = zero;
Scalar pkm2 = one;
Scalar qkm2 = x;
Scalar pkm1 = x + one;
Scalar qkm1 = z * x;
Scalar ans = pkm1 / qkm1;
Scalar dpkm2_da = zero;
Scalar dqkm2_da = zero;
Scalar dpkm1_da = zero;
Scalar dqkm1_da = -x;
Scalar dans_da = (dpkm1_da - ans * dqkm1_da) / qkm1;
for (int i = 0; i < igamma_num_iterations<Scalar, mode>(); i++) {
c += one;
y += one;
z += two;
Scalar yc = y * c;
Scalar pk = pkm1 * z - pkm2 * yc;
Scalar qk = qkm1 * z - qkm2 * yc;
Scalar dpk_da = dpkm1_da * z - pkm1 - dpkm2_da * yc + pkm2 * c;
Scalar dqk_da = dqkm1_da * z - qkm1 - dqkm2_da * yc + qkm2 * c;
if (qk != zero) {
Scalar ans_prev = ans;
ans = pk / qk;
Scalar dans_da_prev = dans_da;
dans_da = (dpk_da - ans * dqk_da) / qk;
if (mode == VALUE) {
if (numext::abs(ans_prev - ans) <= machep * numext::abs(ans)) {
break;
}
} else {
if (numext::abs(dans_da - dans_da_prev) <= machep) {
break;
}
}
}
pkm2 = pkm1;
pkm1 = pk;
qkm2 = qkm1;
qkm1 = qk;
dpkm2_da = dpkm1_da;
dpkm1_da = dpk_da;
dqkm2_da = dqkm1_da;
dqkm1_da = dqk_da;
if (numext::abs(pk) > big) {
pkm2 *= biginv;
pkm1 *= biginv;
qkm2 *= biginv;
qkm1 *= biginv;
dpkm2_da *= biginv;
dpkm1_da *= biginv;
dqkm2_da *= biginv;
dqkm1_da *= biginv;
}
}
/* Compute x**a * exp(-x) / gamma(a) */
Scalar logax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
Scalar dlogax_da = numext::log(x) - digamma_impl<Scalar>::run(a);
Scalar ax = numext::exp(logax);
Scalar dax_da = ax * dlogax_da;
switch (mode) {
case VALUE:
return ans * ax;
case DERIVATIVE:
return ans * dax_da + dans_da * ax;
case SAMPLE_DERIVATIVE:
return -(dans_da + ans * dlogax_da) * x;
}
}
};
template <typename Scalar, IgammaComputationMode mode>
struct igamma_series_impl {
/* Computes igam(a, x) or its derivative (depending on the mode)
* using the series expansion of the incomplete Gamma function.
*
* Preconditions:
* x > 0
* a > 0
* !(x > 1 && x > a)
*/
EIGEN_DEVICE_FUNC
static Scalar run(Scalar a, Scalar x) {
const Scalar zero = 0;
const Scalar one = 1;
const Scalar machep = cephes_helper<Scalar>::machep();
/* power series */
Scalar r = a;
Scalar c = one;
Scalar ans = one;
Scalar dc_da = zero;
Scalar dans_da = zero;
for (int i = 0; i < igamma_num_iterations<Scalar, mode>(); i++) {
r += one;
Scalar term = x / r;
Scalar dterm_da = -x / (r * r);
dc_da = term * dc_da + dterm_da * c;
dans_da += dc_da;
c *= term;
ans += c;
if (mode == VALUE) {
if (c <= machep * ans) {
break;
}
} else {
if (numext::abs(dc_da) <= machep * numext::abs(dans_da)) {
break;
}
}
}
/* Compute x**a * exp(-x) / gamma(a + 1) */
Scalar logax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a + one);
Scalar dlogax_da = numext::log(x) - digamma_impl<Scalar>::run(a + one);
Scalar ax = numext::exp(logax);
Scalar dax_da = ax * dlogax_da;
switch (mode) {
case VALUE:
return ans * ax;
case DERIVATIVE:
return ans * dax_da + dans_da * ax;
case SAMPLE_DERIVATIVE:
return -(dans_da + ans * dlogax_da) * x / a;
}
}
};
#if !EIGEN_HAS_C99_MATH #if !EIGEN_HAS_C99_MATH
template <typename Scalar> template <typename Scalar>
@ -535,8 +726,6 @@ struct igammac_impl {
#else #else
template <typename Scalar> struct igamma_impl; // predeclare igamma_impl
template <typename Scalar> template <typename Scalar>
struct igammac_impl { struct igammac_impl {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -609,92 +798,10 @@ struct igammac_impl {
} }
if ((x < one) || (x < a)) { if ((x < one) || (x < a)) {
/* The checks above ensure that we meet the preconditions for return (one - igamma_series_impl<Scalar, VALUE>::run(a, x));
* igamma_impl::Impl(), so call it, rather than igamma_impl::Run().
* Calling Run() would also work, but in that case the compiler may not be
* able to prove that igammac_impl::Run and igamma_impl::Run are not
* mutually recursive. This leads to worse code, particularly on
* platforms like nvptx, where recursion is allowed only begrudgingly.
*/
return (one - igamma_impl<Scalar>::Impl(a, x));
} }
return Impl(a, x); return igammac_cf_impl<Scalar, VALUE>::run(a, x);
}
private:
/* igamma_impl calls igammac_impl::Impl. */
friend struct igamma_impl<Scalar>;
/* Actually computes igamc(a, x).
*
* Preconditions:
* a > 0
* x >= 1
* x >= a
*/
EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
const Scalar zero = 0;
const Scalar one = 1;
const Scalar two = 2;
const Scalar machep = cephes_helper<Scalar>::machep();
const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
const Scalar big = cephes_helper<Scalar>::big();
const Scalar biginv = cephes_helper<Scalar>::biginv();
const Scalar inf = NumTraits<Scalar>::infinity();
Scalar ans, ax, c, yc, r, t, y, z;
Scalar pk, pkm1, pkm2, qk, qkm1, qkm2;
if (x == inf) return zero; // std::isinf crashes on CUDA
/* Compute x**a * exp(-x) / gamma(a) */
ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
if (ax < -maxlog) { // underflow
return zero;
}
ax = numext::exp(ax);
// continued fraction
y = one - a;
z = x + y + one;
c = zero;
pkm2 = one;
qkm2 = x;
pkm1 = x + one;
qkm1 = z * x;
ans = pkm1 / qkm1;
for (int i = 0; i < 2000; i++) {
c += one;
y += one;
z += two;
yc = y * c;
pk = pkm1 * z - pkm2 * yc;
qk = qkm1 * z - qkm2 * yc;
if (qk != zero) {
r = pk / qk;
t = numext::abs((ans - r) / r);
ans = r;
} else {
t = one;
}
pkm2 = pkm1;
pkm1 = pk;
qkm2 = qkm1;
qkm1 = qk;
if (numext::abs(pk) > big) {
pkm2 *= biginv;
pkm1 *= biginv;
qkm2 *= biginv;
qkm1 *= biginv;
}
if (t <= machep) {
break;
}
}
return (ans * ax);
} }
}; };
@ -704,15 +811,10 @@ struct igammac_impl {
* Implementation of igamma (incomplete gamma integral), based on Cephes but requires C++11/C99 * * Implementation of igamma (incomplete gamma integral), based on Cephes but requires C++11/C99 *
************************************************************************************************/ ************************************************************************************************/
template <typename Scalar>
struct igamma_retval {
typedef Scalar type;
};
#if !EIGEN_HAS_C99_MATH #if !EIGEN_HAS_C99_MATH
template <typename Scalar> template <typename Scalar, IgammaComputationMode mode>
struct igamma_impl { struct igamma_generic_impl {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) { static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
@ -723,14 +825,66 @@ struct igamma_impl {
#else #else
template <typename Scalar> template <typename Scalar, IgammaComputationMode mode>
struct igamma_impl { struct igamma_generic_impl {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static Scalar run(Scalar a, Scalar x) { static Scalar run(Scalar a, Scalar x) {
/* Depending on the mode, returns
* - VALUE: incomplete Gamma function igamma(a, x)
* - DERIVATIVE: derivative of incomplete Gamma function d/da igamma(a, x)
* - SAMPLE_DERIVATIVE: implicit derivative of a Gamma random variable
* x ~ Gamma(x | a, 1), dx/da = -1 / Gamma(x | a, 1) * d igamma(a, x) / dx
*
* Derivatives are implemented by forward-mode differentiation.
*/
const Scalar zero = 0;
const Scalar one = 1;
const Scalar nan = NumTraits<Scalar>::quiet_NaN();
if (x == zero) return zero;
if ((x < zero) || (a <= zero)) { // domain error
return nan;
}
if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans
return nan;
}
if ((x > one) && (x > a)) {
Scalar ret = igammac_cf_impl<Scalar, mode>::run(a, x);
if (mode == VALUE) {
return one - ret;
} else {
return -ret;
}
}
return igamma_series_impl<Scalar, mode>::run(a, x);
}
};
#endif // EIGEN_HAS_C99_MATH
template <typename Scalar>
struct igamma_retval {
typedef Scalar type;
};
template <typename Scalar>
struct igamma_impl : igamma_generic_impl<Scalar, VALUE> {
/* igam() /* igam()
* Incomplete gamma integral * Incomplete gamma integral.
* *
* The CDF of Gamma(a, 1) random variable at the point x.
* *
* Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample
* 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points.
* The ground truth is computed by mpmath. Mean absolute error:
* float: 1.26713e-05
* double: 2.33606e-12
*
* Cephes documentation below.
* *
* SYNOPSIS: * SYNOPSIS:
* *
@ -777,7 +931,6 @@ struct igamma_impl {
Direct inquiries to 30 Frost Street, Cambridge, MA 02140 Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/ */
/* left tail of incomplete gamma function: /* left tail of incomplete gamma function:
* *
* inf. k * inf. k
@ -787,80 +940,73 @@ struct igamma_impl {
* k=0 | (a+k+1) * k=0 | (a+k+1)
* *
*/ */
const Scalar zero = 0;
const Scalar one = 1;
const Scalar nan = NumTraits<Scalar>::quiet_NaN();
if (x == zero) return zero;
if ((x < zero) || (a <= zero)) { // domain error
return nan;
}
if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans
return nan;
}
if ((x > one) && (x > a)) {
/* The checks above ensure that we meet the preconditions for
* igammac_impl::Impl(), so call it, rather than igammac_impl::Run().
* Calling Run() would also work, but in that case the compiler may not be
* able to prove that igammac_impl::Run and igamma_impl::Run are not
* mutually recursive. This leads to worse code, particularly on
* platforms like nvptx, where recursion is allowed only begrudgingly.
*/
return (one - igammac_impl<Scalar>::Impl(a, x));
}
return Impl(a, x);
}
private:
/* igammac_impl calls igamma_impl::Impl. */
friend struct igammac_impl<Scalar>;
/* Actually computes igam(a, x).
*
* Preconditions:
* x > 0
* a > 0
* !(x > 1 && x > a)
*/
EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
const Scalar zero = 0;
const Scalar one = 1;
const Scalar machep = cephes_helper<Scalar>::machep();
const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
Scalar ans, ax, c, r;
/* Compute x**a * exp(-x) / gamma(a) */
ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
if (ax < -maxlog) {
// underflow
return zero;
}
ax = numext::exp(ax);
/* power series */
r = a;
c = one;
ans = one;
for (int i = 0; i < 2000; i++) {
r += one;
c *= x/r;
ans += c;
if (c/ans <= machep) {
break;
}
}
return (ans * ax / a);
}
}; };
#endif // EIGEN_HAS_C99_MATH template <typename Scalar>
struct igamma_der_a_retval : igamma_retval<Scalar> {};
template <typename Scalar>
struct igamma_der_a_impl : igamma_generic_impl<Scalar, DERIVATIVE> {
/* Derivative of the incomplete Gamma function with respect to a.
*
* Computes d/da igamma(a, x) by forward differentiation of the igamma code.
*
* Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample
* 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points.
* The ground truth is computed by mpmath. Mean absolute error:
* float: 6.17992e-07
* double: 4.60453e-12
*
* Reference:
* R. Moore. "Algorithm AS 187: Derivatives of the incomplete gamma
* integral". Journal of the Royal Statistical Society. 1982
*/
};
template <typename Scalar>
struct gamma_sample_der_alpha_retval : igamma_retval<Scalar> {};
template <typename Scalar>
struct gamma_sample_der_alpha_impl
: igamma_generic_impl<Scalar, SAMPLE_DERIVATIVE> {
/* Derivative of a Gamma random variable sample with respect to alpha.
*
* Consider a sample of a Gamma random variable with the concentration
* parameter alpha: sample ~ Gamma(alpha, 1). The reparameterization
* derivative that we want to compute is dsample / dalpha =
* d igammainv(alpha, u) / dalpha, where u = igamma(alpha, sample).
* However, this formula is numerically unstable and expensive, so instead
* we use implicit differentiation:
*
* igamma(alpha, sample) = u, where u ~ Uniform(0, 1).
* Apply d / dalpha to both sides:
* d igamma(alpha, sample) / dalpha
* + d igamma(alpha, sample) / dsample * dsample/dalpha = 0
* d igamma(alpha, sample) / dalpha
* + Gamma(sample | alpha, 1) dsample / dalpha = 0
* dsample/dalpha = - (d igamma(alpha, sample) / dalpha)
* / Gamma(sample | alpha, 1)
*
* Here Gamma(sample | alpha, 1) is the PDF of the Gamma distribution
* (note that the derivative of the CDF w.r.t. sample is the PDF).
* See the reference below for more details.
*
* The derivative of igamma(alpha, sample) is computed by forward
* differentiation of the igamma code. Division by the Gamma PDF is performed
* in the same code, increasing the accuracy and speed due to cancellation
* of some terms.
*
* Accuracy estimation. For each alpha in [10^-2, 10^-1...10^3] we sample
* 50 Gamma random variables sample ~ Gamma(sample | alpha, 1), a total of 300
* points. The ground truth is computed by mpmath. Mean absolute error:
* float: 2.1686e-06
* double: 1.4774e-12
*
* Reference:
* M. Figurnov, S. Mohamed, A. Mnih "Implicit Reparameterization Gradients".
* 2018
*/
};
/***************************************************************************** /*****************************************************************************
* Implementation of Riemann zeta function of two arguments, based on Cephes * * Implementation of Riemann zeta function of two arguments, based on Cephes *
@ -1574,6 +1720,8 @@ struct betainc_impl<double> {
} }
}; };
#endif // EIGEN_HAS_C99_MATH
/**************************************************************************** /****************************************************************************
* Implementation of Bessel function, based on Cephes * * Implementation of Bessel function, based on Cephes *
****************************************************************************/ ****************************************************************************/
@ -1902,8 +2050,6 @@ struct i1e_impl<double> {
} }
}; };
#endif // EIGEN_HAS_C99_MATH
} // end namespace internal } // end namespace internal
namespace numext { namespace numext {
@ -1950,6 +2096,18 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x); return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x);
} }
template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma_der_a, Scalar)
igamma_der_a(const Scalar& a, const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(igamma_der_a, Scalar)::run(a, x);
}
template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(gamma_sample_der_alpha, Scalar)
gamma_sample_der_alpha(const Scalar& a, const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(gamma_sample_der_alpha, Scalar)::run(a, x);
}
template <typename Scalar> template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar) EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar)
igammac(const Scalar& a, const Scalar& x) { igammac(const Scalar& a, const Scalar& x) {

View File

@ -42,6 +42,21 @@ Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); } Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
/** \internal \returns the derivative of the incomplete gamma function
* igamma_der_a(\a a, \a x) */
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma_der_a(const Packet& a, const Packet& x) {
using numext::igamma_der_a; return igamma_der_a(a, x);
}
/** \internal \returns compute the derivative of the sample
* of Gamma(alpha, 1) random variable with respect to the parameter a
* gamma_sample_der_alpha(\a alpha, \a sample) */
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pgamma_sample_der_alpha(const Packet& alpha, const Packet& sample) {
using numext::gamma_sample_der_alpha; return gamma_sample_der_alpha(alpha, sample);
}
/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */ /** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); }

View File

@ -120,6 +120,41 @@ double2 pigamma<double2>(const double2& a, const double2& x)
return make_double2(igamma(a.x, x.x), igamma(a.y, x.y)); return make_double2(igamma(a.x, x.x), igamma(a.y, x.y));
} }
template <>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigamma_der_a<float4>(
const float4& a, const float4& x) {
using numext::igamma_der_a;
return make_float4(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y),
igamma_der_a(a.z, x.z), igamma_der_a(a.w, x.w));
}
template <>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
pigamma_der_a<double2>(const double2& a, const double2& x) {
using numext::igamma_der_a;
return make_double2(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y));
}
template <>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pgamma_sample_der_alpha<float4>(
const float4& alpha, const float4& sample) {
using numext::gamma_sample_der_alpha;
return make_float4(
gamma_sample_der_alpha(alpha.x, sample.x),
gamma_sample_der_alpha(alpha.y, sample.y),
gamma_sample_der_alpha(alpha.z, sample.z),
gamma_sample_der_alpha(alpha.w, sample.w));
}
template <>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
pgamma_sample_der_alpha<double2>(const double2& alpha, const double2& sample) {
using numext::gamma_sample_der_alpha;
return make_double2(
gamma_sample_der_alpha(alpha.x, sample.x),
gamma_sample_der_alpha(alpha.y, sample.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pigammac<float4>(const float4& a, const float4& x) float4 pigammac<float4>(const float4& a, const float4& x)
{ {

View File

@ -181,7 +181,7 @@ namespace Eigen
* \ingroup Splines_Module * \ingroup Splines_Module
* *
* \param[in] pts The data points to which a spline should be fit. * \param[in] pts The data points to which a spline should be fit.
* \param[out] chord_lengths The resulting chord lenggth vector. * \param[out] chord_lengths The resulting chord length vector.
* *
* \sa Les Piegl and Wayne Tiller, The NURBS book (2nd ed.), 1997, 9.2.1 Global Curve Interpolation to Point Data * \sa Les Piegl and Wayne Tiller, The NURBS book (2nd ed.), 1997, 9.2.1 Global Curve Interpolation to Point Data
**/ **/

View File

@ -20,7 +20,7 @@ However, it:
- must rely on Eigen, - must rely on Eigen,
- must be highly related to math, - must be highly related to math,
- should have some general purpose in the sense that it could - should have some general purpose in the sense that it could
potentially become an offical Eigen module (or be merged into another one). potentially become an official Eigen module (or be merged into another one).
In doubt feel free to contact us. For instance, if your addons is very too specific In doubt feel free to contact us. For instance, if your addons is very too specific
but it shows an interesting way of using Eigen, then it could be a nice demo. but it shows an interesting way of using Eigen, then it could be a nice demo.

View File

@ -70,7 +70,7 @@ void bench_svd(const MatrixType& a = MatrixType())
std::cout<< std::endl; std::cout<< std::endl;
timerJacobi.reset(); timerJacobi.reset();
timerBDC.reset(); timerBDC.reset();
cout << " Computes rotaion matrix" <<endl; cout << " Computes rotation matrix" <<endl;
for (int k=1; k<=NUMBER_SAMPLE; ++k) for (int k=1; k<=NUMBER_SAMPLE; ++k)
{ {
timerBDC.start(); timerBDC.start();

View File

@ -81,7 +81,7 @@ void check_limits_specialization()
typedef std::numeric_limits<AD> A; typedef std::numeric_limits<AD> A;
typedef std::numeric_limits<Scalar> B; typedef std::numeric_limits<Scalar> B;
// workaround "unsed typedef" warning: // workaround "unused typedef" warning:
VERIFY(!bool(internal::is_same<B, A>::value)); VERIFY(!bool(internal::is_same<B, A>::value));
#if EIGEN_HAS_CXX11 #if EIGEN_HAS_CXX11

View File

@ -180,6 +180,64 @@ static void test_fixed_size_broadcasting()
#endif #endif
} }
template <int DataLayout>
static void test_simple_broadcasting_one_by_n()
{
Tensor<float, 4, DataLayout> tensor(1,13,5,7);
tensor.setRandom();
array<ptrdiff_t, 4> broadcasts;
broadcasts[0] = 9;
broadcasts[1] = 1;
broadcasts[2] = 1;
broadcasts[3] = 1;
Tensor<float, 4, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 9);
VERIFY_IS_EQUAL(broadcast.dimension(1), 13);
VERIFY_IS_EQUAL(broadcast.dimension(2), 5);
VERIFY_IS_EQUAL(broadcast.dimension(3), 7);
for (int i = 0; i < 9; ++i) {
for (int j = 0; j < 13; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(tensor(i%1,j%13,k%5,l%7), broadcast(i,j,k,l));
}
}
}
}
}
template <int DataLayout>
static void test_simple_broadcasting_n_by_one()
{
Tensor<float, 4, DataLayout> tensor(7,3,5,1);
tensor.setRandom();
array<ptrdiff_t, 4> broadcasts;
broadcasts[0] = 1;
broadcasts[1] = 1;
broadcasts[2] = 1;
broadcasts[3] = 19;
Tensor<float, 4, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), 7);
VERIFY_IS_EQUAL(broadcast.dimension(1), 3);
VERIFY_IS_EQUAL(broadcast.dimension(2), 5);
VERIFY_IS_EQUAL(broadcast.dimension(3), 19);
for (int i = 0; i < 7; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
for (int l = 0; l < 19; ++l) {
VERIFY_IS_EQUAL(tensor(i%7,j%3,k%5,l%1), broadcast(i,j,k,l));
}
}
}
}
}
void test_cxx11_tensor_broadcasting() void test_cxx11_tensor_broadcasting()
{ {
@ -191,4 +249,8 @@ void test_cxx11_tensor_broadcasting()
CALL_SUBTEST(test_static_broadcasting<RowMajor>()); CALL_SUBTEST(test_static_broadcasting<RowMajor>());
CALL_SUBTEST(test_fixed_size_broadcasting<ColMajor>()); CALL_SUBTEST(test_fixed_size_broadcasting<ColMajor>());
CALL_SUBTEST(test_fixed_size_broadcasting<RowMajor>()); CALL_SUBTEST(test_fixed_size_broadcasting<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_n_by_one<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n<ColMajor>());
CALL_SUBTEST(test_simple_broadcasting_n_by_one<ColMajor>());
} }

View File

@ -1318,6 +1318,157 @@ void test_cuda_i1e()
cudaFree(d_out); cudaFree(d_out);
} }
template <typename Scalar>
void test_cuda_igamma_der_a()
{
Tensor<Scalar, 1> in_x(30);
Tensor<Scalar, 1> in_a(30);
Tensor<Scalar, 1> out(30);
Tensor<Scalar, 1> expected_out(30);
out.setZero();
Array<Scalar, 1, Dynamic> in_a_array(30);
Array<Scalar, 1, Dynamic> in_x_array(30);
Array<Scalar, 1, Dynamic> expected_out_array(30);
// See special_functions.cpp for the Python code that generates the test data.
in_a_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0,
1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0,
100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
in_x_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065,
0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288,
1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458,
7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233,
92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677,
968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
expected_out_array << -32.7256441441, -36.4394150514, -9.66467612263,
-36.4394150514, -36.4394150514, -1.0891900302, -2.66351229645,
-2.48666868596, -0.929700494428, -3.56327722764, -0.455320135314,
-0.391437214323, -0.491352055991, -0.350454834292, -0.471773162921,
-0.104084440522, -0.0723646747909, -0.0992828975532, -0.121638215446,
-0.122619605294, -0.0317670267286, -0.0359974812869, -0.0154359225363,
-0.0375775365921, -0.00794899153653, -0.00777303219211, -0.00796085782042,
-0.0125850719397, -0.00455500206958, -0.00476436993148;
for (int i = 0; i < 30; ++i) {
in_x(i) = in_x_array(i);
in_a(i) = in_a_array(i);
expected_out(i) = expected_out_array(i);
}
std::size_t bytes = in_x.size() * sizeof(Scalar);
Scalar* d_a;
Scalar* d_x;
Scalar* d_out;
cudaMalloc((void**)(&d_a), bytes);
cudaMalloc((void**)(&d_x), bytes);
cudaMalloc((void**)(&d_out), bytes);
cudaMemcpy(d_a, in_a.data(), bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_a(d_a, 30);
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_x(d_x, 30);
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 30);
gpu_out.device(gpu_device) = gpu_a.igamma_der_a(gpu_x);
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
gpu_device.stream()) == cudaSuccess);
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
for (int i = 0; i < 30; ++i) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
cudaFree(d_a);
cudaFree(d_x);
cudaFree(d_out);
}
template <typename Scalar>
void test_cuda_gamma_sample_der_alpha()
{
Tensor<Scalar, 1> in_alpha(30);
Tensor<Scalar, 1> in_sample(30);
Tensor<Scalar, 1> out(30);
Tensor<Scalar, 1> expected_out(30);
out.setZero();
Array<Scalar, 1, Dynamic> in_alpha_array(30);
Array<Scalar, 1, Dynamic> in_sample_array(30);
Array<Scalar, 1, Dynamic> expected_out_array(30);
// See special_functions.cpp for the Python code that generates the test data.
in_alpha_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0,
1.0, 1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0,
100.0, 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
in_sample_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065,
0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288,
1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458,
7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233,
92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677,
968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
expected_out_array << 7.42424742367e-23, 1.02004297287e-34, 0.0130155240738,
1.02004297287e-34, 1.02004297287e-34, 1.96505168277e-13, 0.525575786243,
0.713903991771, 2.32077561808e-14, 0.000179348049886, 0.635500453302,
1.27561284917, 0.878125852156, 0.41565819538, 1.03606488534,
0.885964824887, 1.16424049334, 1.10764479598, 1.04590810812,
1.04193666963, 0.965193152414, 0.976217589464, 0.93008035061,
0.98153216096, 0.909196397698, 0.98434963993, 0.984738050206,
1.00106492525, 0.97734200649, 1.02198794179;
for (int i = 0; i < 30; ++i) {
in_alpha(i) = in_alpha_array(i);
in_sample(i) = in_sample_array(i);
expected_out(i) = expected_out_array(i);
}
std::size_t bytes = in_alpha.size() * sizeof(Scalar);
Scalar* d_alpha;
Scalar* d_sample;
Scalar* d_out;
cudaMalloc((void**)(&d_alpha), bytes);
cudaMalloc((void**)(&d_sample), bytes);
cudaMalloc((void**)(&d_out), bytes);
cudaMemcpy(d_alpha, in_alpha.data(), bytes, cudaMemcpyHostToDevice);
cudaMemcpy(d_sample, in_sample.data(), bytes, cudaMemcpyHostToDevice);
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_alpha(d_alpha, 30);
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_sample(d_sample, 30);
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 30);
gpu_out.device(gpu_device) = gpu_alpha.gamma_sample_der_alpha(gpu_sample);
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
gpu_device.stream()) == cudaSuccess);
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
for (int i = 0; i < 30; ++i) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
cudaFree(d_alpha);
cudaFree(d_sample);
cudaFree(d_out);
}
void test_cxx11_tensor_cuda() void test_cxx11_tensor_cuda()
{ {
@ -1396,5 +1547,11 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_6(test_cuda_i1e<float>()); CALL_SUBTEST_6(test_cuda_i1e<float>());
CALL_SUBTEST_6(test_cuda_i1e<double>()); CALL_SUBTEST_6(test_cuda_i1e<double>());
CALL_SUBTEST_6(test_cuda_igamma_der_a<float>());
CALL_SUBTEST_6(test_cuda_igamma_der_a<double>());
CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha<float>());
CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha<double>());
#endif #endif
} }

View File

@ -22,10 +22,10 @@
using Eigen::Tensor; using Eigen::Tensor;
// Inflation Defenition for each dimention the inflated val would be // Inflation Definition for each dimension the inflated val would be
//((dim-1)*strid[dim] +1) //((dim-1)*strid[dim] +1)
// for 1 dimnention vector of size 3 with value (4,4,4) with the inflated stride value of 3 would be changed to // for 1 dimension vector of size 3 with value (4,4,4) with the inflated stride value of 3 would be changed to
// tensor of size (2*3) +1 = 7 with the value of // tensor of size (2*3) +1 = 7 with the value of
// (4, 0, 0, 4, 0, 0, 4). // (4, 0, 0, 4, 0, 0, 4).

View File

@ -247,7 +247,7 @@ void test_cuda_trancendental() {
} }
for (int i = 0; i < num_elem; ++i) { for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl; std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl;
if(std::abs(input2(i)-1.f)<0.05f) // log lacks accurary nearby 1 if(std::abs(input2(i)-1.f)<0.05f) // log lacks accuracy nearby 1
VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f)); VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f));
else else
VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));

View File

@ -37,7 +37,7 @@ void test_cuda_random_uniform()
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
// For now we just check thes code doesn't crash. // For now we just check this code doesn't crash.
// TODO: come up with a valid test of randomness // TODO: come up with a valid test of randomness
} }

View File

@ -132,7 +132,7 @@ void test_forward_adolc()
} }
{ {
// simple instanciation tests // simple instantiation tests
Matrix<adtl::adouble,2,1> x; Matrix<adtl::adouble,2,1> x;
foo(x); foo(x);
Matrix<adtl::adouble,Dynamic,Dynamic> A(4,4);; Matrix<adtl::adouble,Dynamic,Dynamic> A(4,4);;

View File

@ -8,7 +8,7 @@
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// import basic and product tests for deprectaed DynamicSparseMatrix // import basic and product tests for deprecated DynamicSparseMatrix
#define EIGEN_NO_DEPRECATED_WARNING #define EIGEN_NO_DEPRECATED_WARNING
#include "sparse_basic.cpp" #include "sparse_basic.cpp"
#include "sparse_product.cpp" #include "sparse_product.cpp"

View File

@ -335,6 +335,7 @@ template<typename ArrayType> void array_special_functions()
ArrayType test = betainc(a, b + one, x) + eps; ArrayType test = betainc(a, b + one, x) + eps;
verify_component_wise(test, expected);); verify_component_wise(test, expected););
} }
#endif // EIGEN_HAS_C99_MATH
// Test Bessel function i0e. Reference results obtained with SciPy. // Test Bessel function i0e. Reference results obtained with SciPy.
{ {
@ -375,7 +376,100 @@ template<typename ArrayType> void array_special_functions()
CALL_SUBTEST(res = i1e(x); CALL_SUBTEST(res = i1e(x);
verify_component_wise(res, expected);); verify_component_wise(res, expected););
} }
#endif
/* Code to generate the data for the following two test cases.
N = 5
np.random.seed(3)
a = np.logspace(-2, 3, 6)
a = np.ravel(np.tile(np.reshape(a, [-1, 1]), [1, N]))
x = np.random.gamma(a, 1.0)
x = np.maximum(x, np.finfo(np.float32).tiny)
def igamma(a, x):
return mpmath.gammainc(a, 0, x, regularized=True)
def igamma_der_a(a, x):
res = mpmath.diff(lambda a_prime: igamma(a_prime, x), a)
return np.float64(res)
def gamma_sample_der_alpha(a, x):
igamma_x = igamma(a, x)
def igammainv_of_igamma(a_prime):
return mpmath.findroot(lambda x_prime: igamma(a_prime, x_prime) -
igamma_x, x, solver='newton')
return np.float64(mpmath.diff(igammainv_of_igamma, a))
v_igamma_der_a = np.vectorize(igamma_der_a)(a, x)
v_gamma_sample_der_alpha = np.vectorize(gamma_sample_der_alpha)(a, x)
*/
#if EIGEN_HAS_C99_MATH
// Test igamma_der_a
{
ArrayType a(30);
ArrayType x(30);
ArrayType res(30);
ArrayType v(30);
a << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0, 1.0,
1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0,
100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
x << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16,
0.0132865061065, 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06,
0.333412038288, 1.18135687766, 0.580629033777, 0.170631439426,
0.786686768458, 7.63873279537, 13.1944344379, 11.896042354,
10.5830172417, 10.5020942233, 92.8918587747, 95.003720371,
86.3715926467, 96.0330217672, 82.6389930677, 968.702906754,
969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
v << -32.7256441441, -36.4394150514, -9.66467612263, -36.4394150514,
-36.4394150514, -1.0891900302, -2.66351229645, -2.48666868596,
-0.929700494428, -3.56327722764, -0.455320135314, -0.391437214323,
-0.491352055991, -0.350454834292, -0.471773162921, -0.104084440522,
-0.0723646747909, -0.0992828975532, -0.121638215446, -0.122619605294,
-0.0317670267286, -0.0359974812869, -0.0154359225363, -0.0375775365921,
-0.00794899153653, -0.00777303219211, -0.00796085782042,
-0.0125850719397, -0.00455500206958, -0.00476436993148;
CALL_SUBTEST(res = igamma_der_a(a, x); verify_component_wise(res, v););
}
// Test gamma_sample_der_alpha
{
ArrayType alpha(30);
ArrayType sample(30);
ArrayType res(30);
ArrayType v(30);
alpha << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0,
1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0,
100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
sample << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16,
0.0132865061065, 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06,
0.333412038288, 1.18135687766, 0.580629033777, 0.170631439426,
0.786686768458, 7.63873279537, 13.1944344379, 11.896042354,
10.5830172417, 10.5020942233, 92.8918587747, 95.003720371,
86.3715926467, 96.0330217672, 82.6389930677, 968.702906754,
969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
v << 7.42424742367e-23, 1.02004297287e-34, 0.0130155240738,
1.02004297287e-34, 1.02004297287e-34, 1.96505168277e-13, 0.525575786243,
0.713903991771, 2.32077561808e-14, 0.000179348049886, 0.635500453302,
1.27561284917, 0.878125852156, 0.41565819538, 1.03606488534,
0.885964824887, 1.16424049334, 1.10764479598, 1.04590810812,
1.04193666963, 0.965193152414, 0.976217589464, 0.93008035061,
0.98153216096, 0.909196397698, 0.98434963993, 0.984738050206,
1.00106492525, 0.97734200649, 1.02198794179;
CALL_SUBTEST(res = gamma_sample_der_alpha(alpha, sample);
verify_component_wise(res, v););
}
#endif // EIGEN_HAS_C99_MATH
} }
void test_special_functions() void test_special_functions()