mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-19 08:09:36 +08:00
syncing this fork with upstream
This commit is contained in:
commit
d1d22ef0f4
@ -157,6 +157,11 @@ template<typename Derived> class DenseBase
|
||||
* we are dealing with a column-vector (if there is only one column) or with
|
||||
* a row-vector (if there is only one row). */
|
||||
|
||||
NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2,
|
||||
/**< This value is equal to Tensor::NumDimensions, i.e. 0 for scalars, 1 for vectors,
|
||||
* and 2 for matrices.
|
||||
*/
|
||||
|
||||
Flags = internal::traits<Derived>::Flags,
|
||||
/**< This stores expression \ref flags flags which may or may not be inherited by new expressions
|
||||
* constructed from this one. See the \ref flags "list of flags".
|
||||
|
@ -85,6 +85,8 @@ struct default_packet_traits
|
||||
HasI0e = 0,
|
||||
HasI1e = 0,
|
||||
HasIGamma = 0,
|
||||
HasIGammaDerA = 0,
|
||||
HasGammaSampleDerAlpha = 0,
|
||||
HasIGammac = 0,
|
||||
HasBetaInc = 0,
|
||||
|
||||
|
@ -1293,17 +1293,17 @@ double exp(const double &x) { return ::exp(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
std::complex<float> exp(const std::complex<float>& x) {
|
||||
auto com = ::expf(x.real());
|
||||
auto res_real = com * ::cosf(x.imag());
|
||||
auto res_imag = com * ::sinf(x.imag());
|
||||
float com = ::expf(x.real());
|
||||
float res_real = com * ::cosf(x.imag());
|
||||
float res_imag = com * ::sinf(x.imag());
|
||||
return std::complex<float>(res_real, res_imag);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
std::complex<double> exp(const std::complex<double>& x) {
|
||||
auto com = ::exp(x.real());
|
||||
auto res_real = com * ::cos(x.imag());
|
||||
auto res_imag = com * ::sin(x.imag());
|
||||
double com = ::exp(x.real());
|
||||
double res_real = com * ::cos(x.imag());
|
||||
double res_imag = com * ::sin(x.imag());
|
||||
return std::complex<double>(res_real, res_imag);
|
||||
}
|
||||
#endif
|
||||
|
@ -137,10 +137,14 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
<<<<<<< local
|
||||
#if defined(EIGEN_HIPCC)
|
||||
EIGEN_DEVICE_FUNC
|
||||
#endif
|
||||
static EIGEN_STRONG_INLINE
|
||||
=======
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
>>>>>>> other
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
@ -158,7 +162,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static EIGEN_STRONG_INLINE
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
@ -173,7 +177,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
|
||||
typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
|
||||
{
|
||||
typedef Product<Lhs,Rhs,Options> SrcXprType;
|
||||
static EIGEN_STRONG_INLINE
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
@ -193,7 +197,7 @@ struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBi
|
||||
typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
|
||||
const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
|
||||
const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
|
||||
static EIGEN_STRONG_INLINE
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
|
||||
{
|
||||
call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
|
||||
@ -220,7 +224,7 @@ template<typename DstXprType, typename OtherXpr, typename ProductType, typename
|
||||
struct assignment_from_xpr_op_product
|
||||
{
|
||||
template<typename SrcXprType, typename InitialFunc>
|
||||
static EIGEN_STRONG_INLINE
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
|
||||
{
|
||||
call_assignment_no_alias(dst, src.lhs(), Func1());
|
||||
@ -249,19 +253,19 @@ template<typename Lhs, typename Rhs>
|
||||
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
||||
{
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
|
||||
};
|
||||
|
||||
@ -272,7 +276,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
|
||||
|
||||
// Column major result
|
||||
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
||||
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
||||
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
|
||||
{
|
||||
evaluator<Rhs> rhsEval(rhs);
|
||||
typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
|
||||
@ -285,7 +289,7 @@ void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const
|
||||
|
||||
// Row major result
|
||||
template<typename Dst, typename Lhs, typename Rhs, typename Func>
|
||||
void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
||||
void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
|
||||
{
|
||||
evaluator<Lhs> lhsEval(lhs);
|
||||
typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
|
||||
@ -303,37 +307,37 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
|
||||
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
||||
struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
||||
struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
||||
struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
||||
struct set { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
|
||||
struct add { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
|
||||
struct sub { template<typename Dst, typename Src> EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
|
||||
struct adds {
|
||||
Scalar m_scale;
|
||||
explicit adds(const Scalar& s) : m_scale(s) {}
|
||||
template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
|
||||
template<typename Dst, typename Src> void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const {
|
||||
dst.const_cast_derived() += m_scale * src;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{
|
||||
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
|
||||
}
|
||||
@ -348,19 +352,19 @@ struct generic_product_impl_base
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{ dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{ scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{ scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{ Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
|
||||
|
||||
};
|
||||
@ -376,7 +380,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
|
||||
typedef typename internal::remove_all<typename internal::conditional<int(Side)==OnTheRight,LhsNested,RhsNested>::type>::type MatrixType;
|
||||
|
||||
template<typename Dest>
|
||||
static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
|
||||
{
|
||||
LhsNested actual_lhs(lhs);
|
||||
RhsNested actual_rhs(rhs);
|
||||
@ -393,10 +397,14 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
||||
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
|
||||
|
||||
template<typename Dst>
|
||||
<<<<<<< local
|
||||
#if defined(EIGEN_HIPCC)
|
||||
EIGEN_DEVICE_FUNC
|
||||
#endif
|
||||
static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
=======
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
>>>>>>> other
|
||||
{
|
||||
// Same as: dst.noalias() = lhs.lazyProduct(rhs);
|
||||
// but easier on the compiler side
|
||||
@ -404,14 +412,14 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
// dst.noalias() += lhs.lazyProduct(rhs);
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
|
||||
}
|
||||
|
||||
template<typename Dst>
|
||||
static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
|
||||
{
|
||||
// dst.noalias() -= lhs.lazyProduct(rhs);
|
||||
call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
|
||||
|
@ -17,7 +17,6 @@ namespace Eigen {
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
@ -25,7 +24,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(co
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
@ -33,7 +31,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(co
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
@ -41,7 +38,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(co
|
||||
template<typename Derived>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
|
||||
{
|
||||
typedef typename Derived::PlainObject PlainObject;
|
||||
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
@ -181,7 +181,7 @@ struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<t
|
||||
}
|
||||
};
|
||||
|
||||
} // end namepsace internal
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
@ -56,7 +56,8 @@ class SolverBase : public EigenBase<Derived>
|
||||
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime>::ret),
|
||||
IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
|
||||
|| internal::traits<Derived>::MaxColsAtCompileTime == 1
|
||||
|| internal::traits<Derived>::MaxColsAtCompileTime == 1,
|
||||
NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2
|
||||
};
|
||||
|
||||
/** Default constructor */
|
||||
|
@ -318,9 +318,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
|
||||
|
@ -604,9 +604,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
|
||||
pstore(to, pa);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
|
||||
|
@ -47,6 +47,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasI0e = 1,
|
||||
HasI1e = 1,
|
||||
HasIGamma = 1,
|
||||
HasIGammaDerA = 1,
|
||||
HasGammaSampleDerAlpha = 1,
|
||||
HasIGammac = 1,
|
||||
HasBetaInc = 1,
|
||||
|
||||
@ -78,6 +80,8 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasI0e = 1,
|
||||
HasI1e = 1,
|
||||
HasIGamma = 1,
|
||||
HasIGammaDerA = 1,
|
||||
HasGammaSampleDerAlpha = 1,
|
||||
HasIGammac = 1,
|
||||
HasBetaInc = 1,
|
||||
|
||||
|
@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
{
|
||||
@ -324,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
|
||||
{
|
||||
|
@ -461,10 +461,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
|
||||
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
|
||||
}
|
||||
|
||||
#if EIGEN_COMP_PGI
|
||||
typedef const void * SsePrefetchPtrType;
|
||||
#else
|
||||
typedef const char * SsePrefetchPtrType;
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const void*)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
|
||||
#endif
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
|
||||
|
@ -91,7 +91,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
|
||||
// FIXME the transpose variable is only needed to properly split
|
||||
// the matrix product when multithreading is enabled. This is a temporary
|
||||
// fix to support row-major destination matrices. This whole
|
||||
// parallelizer mechanism has to be redisigned anyway.
|
||||
// parallelizer mechanism has to be redesigned anyway.
|
||||
EIGEN_UNUSED_VARIABLE(depth);
|
||||
EIGEN_UNUSED_VARIABLE(transpose);
|
||||
func(0,rows, 0,cols);
|
||||
|
@ -702,7 +702,7 @@ namespace Eigen {
|
||||
// If the user explicitly disable vectorization, then we also disable alignment
|
||||
#if defined(EIGEN_DONT_VECTORIZE)
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
|
||||
#elif defined(EIGEN_VECTORIZE_AVX512)
|
||||
#elif defined(__AVX512F__)
|
||||
// 64 bytes static alignment is preferred only if really required
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
|
||||
#elif defined(__AVX__)
|
||||
@ -1033,7 +1033,13 @@ namespace Eigen {
|
||||
# define EIGEN_NOEXCEPT
|
||||
# define EIGEN_NOEXCEPT_IF(x)
|
||||
# define EIGEN_NO_THROW throw()
|
||||
# define EIGEN_EXCEPTION_SPEC(X) throw(X)
|
||||
# if EIGEN_COMP_MSVC
|
||||
// MSVC does not support exception specifications (warning C4290),
|
||||
// and they are deprecated in c++11 anyway.
|
||||
# define EIGEN_EXCEPTION_SPEC(X) throw()
|
||||
# else
|
||||
# define EIGEN_EXCEPTION_SPEC(X) throw(X)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif // EIGEN_MACROS_H
|
||||
|
@ -587,23 +587,27 @@ T div_ceil(const T &a, const T &b)
|
||||
|
||||
// The aim of the following functions is to bypass -Wfloat-equal warnings
|
||||
// when we really want a strict equality comparison on floating points.
|
||||
template<typename X, typename Y> EIGEN_STRONG_INLINE
|
||||
template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
bool equal_strict(const X& x,const Y& y) { return x == y; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
#if !defined(EIGEN_CUDA_ARCH)
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
|
||||
#endif
|
||||
|
||||
template<typename X, typename Y> EIGEN_STRONG_INLINE
|
||||
template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
bool not_equal_strict(const X& x,const Y& y) { return x != y; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
#if !defined(EIGEN_CUDA_ARCH)
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
|
||||
#endif
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
|
@ -284,13 +284,13 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
|
||||
using std::abs;
|
||||
|
||||
m_matT = matrixH;
|
||||
m_workspaceVector.resize(m_matT.cols());
|
||||
if(computeU)
|
||||
m_matU = matrixQ;
|
||||
matrixQ.evalTo(m_matU, m_workspaceVector);
|
||||
|
||||
Index maxIters = m_maxIters;
|
||||
if (maxIters == -1)
|
||||
maxIters = m_maxIterationsPerRow * matrixH.rows();
|
||||
m_workspaceVector.resize(m_matT.cols());
|
||||
Scalar* workspace = &m_workspaceVector.coeffRef(0);
|
||||
|
||||
// The matrix m_matT is divided in three parts.
|
||||
|
@ -108,7 +108,7 @@ struct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, interna
|
||||
}
|
||||
};
|
||||
|
||||
} // end namepsace internal
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
/*
|
||||
|
||||
NOTE: thes functions vave been adapted from the LDL library:
|
||||
NOTE: these functions have been adapted from the LDL library:
|
||||
|
||||
LDL Copyright (c) 2005 by Timothy A. Davis. All Rights Reserved.
|
||||
|
||||
|
@ -87,6 +87,11 @@ template<typename Derived> class SparseMatrixBase
|
||||
* we are dealing with a column-vector (if there is only one column) or with
|
||||
* a row-vector (if there is only one row). */
|
||||
|
||||
NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 : bool(IsVectorAtCompileTime) ? 1 : 2,
|
||||
/**< This value is equal to Tensor::NumDimensions, i.e. 0 for scalars, 1 for vectors,
|
||||
* and 2 for matrices.
|
||||
*/
|
||||
|
||||
Flags = internal::traits<Derived>::Flags,
|
||||
/**< This stores expression \ref flags flags which may or may not be inherited by new expressions
|
||||
* constructed from this one. See the \ref flags "list of flags".
|
||||
|
@ -6,4 +6,4 @@ int main()
|
||||
{
|
||||
Array4d v(0.5,10,0,-1);
|
||||
std::cout << v.lgamma() << std::endl;
|
||||
}
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ int main()
|
||||
MatrixXi m(size,size+1); // a (size)x(size+1)-matrix of int's
|
||||
for (int j=0; j<m.cols(); ++j) // loop over columns
|
||||
for (int i=0; i<m.rows(); ++i) // loop over rows
|
||||
m(i,j) = i+j*m.rows(); // to access matrix coefficients,
|
||||
m(i,j) = i+j*size; // to access matrix coefficients,
|
||||
// use operator()(int,int)
|
||||
std::cout << m << "\n\n";
|
||||
}
|
||||
|
@ -67,6 +67,7 @@ namespace internal {
|
||||
// This method should implement "dst += alpha * lhs * rhs" inplace,
|
||||
// however, for iterative solvers, alpha is always equal to 1, so let's not bother about it.
|
||||
assert(alpha==Scalar(1) && "scaling is not implemented");
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(alpha);
|
||||
|
||||
// Here we could simply call dst.noalias() += lhs.my_matrix() * rhs,
|
||||
// but let's do something fancier (and less efficient):
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Eigen/Sparse>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
typedef Eigen::SparseMatrix<double> SpMat; // declares a column-major sparse matrix type of double
|
||||
typedef Eigen::Triplet<double> T;
|
||||
@ -9,7 +10,10 @@ void saveAsBitmap(const Eigen::VectorXd& x, int n, const char* filename);
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
assert(argc==2);
|
||||
if(argc!=2) {
|
||||
std::cerr << "Error: expected one and only one argument.\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
int n = 300; // size of the image
|
||||
int m = n*n; // number of unknowns (=number of pixels)
|
||||
|
@ -47,7 +47,7 @@ set(EIGEN_TEST_MATRIX_DIR "" CACHE STRING "Enable testing of realword sparse mat
|
||||
if(EIGEN_TEST_MATRIX_DIR)
|
||||
if(NOT WIN32)
|
||||
message(STATUS "Test realworld sparse matrices: ${EIGEN_TEST_MATRIX_DIR}")
|
||||
add_definitions( -DTEST_REAL_CASES=${EIGEN_TEST_MATRIX_DIR} )
|
||||
add_definitions( -DTEST_REAL_CASES="${EIGEN_TEST_MATRIX_DIR}" )
|
||||
else(NOT WIN32)
|
||||
message(STATUS "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32")
|
||||
endif(NOT WIN32)
|
||||
@ -292,6 +292,7 @@ ei_add_test(mpl2only)
|
||||
ei_add_test(inplace_decomposition)
|
||||
ei_add_test(half_float)
|
||||
ei_add_test(array_of_string)
|
||||
ei_add_test(num_dimensions)
|
||||
|
||||
add_executable(bug1213 bug1213.cpp bug1213_main.cpp)
|
||||
|
||||
|
@ -140,7 +140,7 @@ void check_indexed_view()
|
||||
"500 501 502 503 504 505 506 507 508 509")
|
||||
);
|
||||
|
||||
// takes the row numer 3, and repeat it 5 times
|
||||
// take row number 3, and repeat it 5 times
|
||||
VERIFY( MATCH( A(seqN(3,5,0), all),
|
||||
"300 301 302 303 304 305 306 307 308 309\n"
|
||||
"300 301 302 303 304 305 306 307 308 309\n"
|
||||
@ -397,10 +397,6 @@ void test_indexed_view()
|
||||
// }
|
||||
|
||||
// static checks of some internals:
|
||||
|
||||
#define STATIC_CHECK( COND ) \
|
||||
EIGEN_STATIC_ASSERT( (COND) , EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT )
|
||||
|
||||
STATIC_CHECK(( internal::is_valid_index_type<int>::value ));
|
||||
STATIC_CHECK(( internal::is_valid_index_type<unsigned int>::value ));
|
||||
STATIC_CHECK(( internal::is_valid_index_type<short>::value ));
|
||||
|
@ -345,6 +345,8 @@ inline void verify_impl(bool condition, const char *testname, const char *file,
|
||||
|
||||
#define VERIFY_IS_UNITARY(a) VERIFY(test_isUnitary(a))
|
||||
|
||||
#define STATIC_CHECK(COND) EIGEN_STATIC_ASSERT( (COND) , EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT )
|
||||
|
||||
#define CALL_SUBTEST(FUNC) do { \
|
||||
g_test_stack.push_back(EI_PP_MAKE_STRING(FUNC)); \
|
||||
FUNC; \
|
||||
@ -365,7 +367,7 @@ template<> inline long double test_precision<std::complex<long double> >() { ret
|
||||
inline bool test_isApprox(const short& a, const short& b)
|
||||
{ return internal::isApprox(a, b, test_precision<short>()); }
|
||||
inline bool test_isApprox(const unsigned short& a, const unsigned short& b)
|
||||
{ return internal::isApprox(a, b, test_precision<unsigned long>()); }
|
||||
{ return internal::isApprox(a, b, test_precision<unsigned short>()); }
|
||||
inline bool test_isApprox(const unsigned int& a, const unsigned int& b)
|
||||
{ return internal::isApprox(a, b, test_precision<unsigned int>()); }
|
||||
inline bool test_isApprox(const long& a, const long& b)
|
||||
|
@ -205,7 +205,6 @@ void test_mapped_matrix()
|
||||
CALL_SUBTEST_8( map_static_methods(RowVector3d()) );
|
||||
CALL_SUBTEST_9( map_static_methods(VectorXcd(8)) );
|
||||
CALL_SUBTEST_10( map_static_methods(VectorXf(12)) );
|
||||
|
||||
CALL_SUBTEST_11( map_not_aligned_on_scalar<double>() );
|
||||
}
|
||||
}
|
||||
|
90
test/num_dimensions.cpp
Normal file
90
test/num_dimensions.cpp
Normal file
@ -0,0 +1,90 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2018 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
#include <Eigen/SparseCore>
|
||||
|
||||
template<int ExpectedDim,typename Xpr>
|
||||
void check_dim(const Xpr& ) {
|
||||
STATIC_CHECK( Xpr::NumDimensions == ExpectedDim );
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
template<template <typename,int,int> class Object>
|
||||
void map_num_dimensions()
|
||||
{
|
||||
typedef Object<double, 1, 1> ArrayScalarType;
|
||||
typedef Object<double, 2, 1> ArrayVectorType;
|
||||
typedef Object<double, 1, 2> TransposeArrayVectorType;
|
||||
typedef Object<double, 2, 2> ArrayType;
|
||||
typedef Object<double, Eigen::Dynamic, 1> DynamicArrayVectorType;
|
||||
typedef Object<double, 1, Eigen::Dynamic> DynamicTransposeArrayVectorType;
|
||||
typedef Object<double, Eigen::Dynamic, Eigen::Dynamic> DynamicArrayType;
|
||||
|
||||
STATIC_CHECK(ArrayScalarType::NumDimensions == 0);
|
||||
STATIC_CHECK(ArrayVectorType::NumDimensions == 1);
|
||||
STATIC_CHECK(TransposeArrayVectorType::NumDimensions == 1);
|
||||
STATIC_CHECK(ArrayType::NumDimensions == 2);
|
||||
STATIC_CHECK(DynamicArrayVectorType::NumDimensions == 1);
|
||||
STATIC_CHECK(DynamicTransposeArrayVectorType::NumDimensions == 1);
|
||||
STATIC_CHECK(DynamicArrayType::NumDimensions == 2);
|
||||
|
||||
typedef Eigen::Map<ArrayScalarType> ArrayScalarMap;
|
||||
typedef Eigen::Map<ArrayVectorType> ArrayVectorMap;
|
||||
typedef Eigen::Map<TransposeArrayVectorType> TransposeArrayVectorMap;
|
||||
typedef Eigen::Map<ArrayType> ArrayMap;
|
||||
typedef Eigen::Map<DynamicArrayVectorType> DynamicArrayVectorMap;
|
||||
typedef Eigen::Map<DynamicTransposeArrayVectorType> DynamicTransposeArrayVectorMap;
|
||||
typedef Eigen::Map<DynamicArrayType> DynamicArrayMap;
|
||||
|
||||
STATIC_CHECK(ArrayScalarMap::NumDimensions == 0);
|
||||
STATIC_CHECK(ArrayVectorMap::NumDimensions == 1);
|
||||
STATIC_CHECK(TransposeArrayVectorMap::NumDimensions == 1);
|
||||
STATIC_CHECK(ArrayMap::NumDimensions == 2);
|
||||
STATIC_CHECK(DynamicArrayVectorMap::NumDimensions == 1);
|
||||
STATIC_CHECK(DynamicTransposeArrayVectorMap::NumDimensions == 1);
|
||||
STATIC_CHECK(DynamicArrayMap::NumDimensions == 2);
|
||||
}
|
||||
|
||||
template<typename Scalar, int Rows, int Cols>
|
||||
using TArray = Array<Scalar,Rows,Cols>;
|
||||
|
||||
template<typename Scalar, int Rows, int Cols>
|
||||
using TMatrix = Matrix<Scalar,Rows,Cols>;
|
||||
|
||||
#endif
|
||||
|
||||
void test_num_dimensions()
|
||||
{
|
||||
int n = 10;
|
||||
ArrayXXd A(n,n);
|
||||
CALL_SUBTEST( check_dim<2>(A) );
|
||||
CALL_SUBTEST( check_dim<2>(A.block(1,1,2,2)) );
|
||||
CALL_SUBTEST( check_dim<1>(A.col(1)) );
|
||||
CALL_SUBTEST( check_dim<1>(A.row(1)) );
|
||||
|
||||
MatrixXd M(n,n);
|
||||
CALL_SUBTEST( check_dim<0>(M.row(1)*M.col(1)) );
|
||||
|
||||
SparseMatrix<double> S(n,n);
|
||||
CALL_SUBTEST( check_dim<2>(S) );
|
||||
CALL_SUBTEST( check_dim<2>(S.block(1,1,2,2)) );
|
||||
CALL_SUBTEST( check_dim<1>(S.col(1)) );
|
||||
CALL_SUBTEST( check_dim<1>(S.row(1)) );
|
||||
|
||||
SparseVector<double> s(n);
|
||||
CALL_SUBTEST( check_dim<1>(s) );
|
||||
CALL_SUBTEST( check_dim<1>(s.head(2)) );
|
||||
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
CALL_SUBTEST( map_num_dimensions<TArray>() );
|
||||
CALL_SUBTEST( map_num_dimensions<TMatrix>() );
|
||||
#endif
|
||||
}
|
@ -581,7 +581,7 @@ is not initialized.
|
||||
|
||||
Creates a tensor mapping an existing array of data. The data must not be freed
|
||||
until the TensorMap is discarded, and the size of the data must be large enough
|
||||
to accomodate of the coefficients of the tensor.
|
||||
to accommodate the coefficients of the tensor.
|
||||
|
||||
float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
|
||||
Eigen::TensorMap<Tensor<float, 2>> a(data, 3, 4);
|
||||
|
@ -48,7 +48,7 @@ namespace Eigen {
|
||||
*
|
||||
* <dl>
|
||||
* <dt><b>Relation to other parts of Eigen:</b></dt>
|
||||
* <dd>The midterm developement goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that
|
||||
* <dd>The midterm development goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that
|
||||
* taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code
|
||||
* by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor
|
||||
* class does not provide any of these features and is only available as a stand-alone class that just allows for
|
||||
|
@ -20,7 +20,7 @@ namespace Eigen {
|
||||
* \brief The tensor base class.
|
||||
*
|
||||
* This class is the common parent of the Tensor and TensorMap class, thus
|
||||
* making it possible to use either class interchangably in expressions.
|
||||
* making it possible to use either class interchangeably in expressions.
|
||||
*/
|
||||
|
||||
template<typename Derived>
|
||||
@ -152,6 +152,20 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return binaryExpr(other.derived(), internal::scalar_igamma_op<Scalar>());
|
||||
}
|
||||
|
||||
// igamma_der_a(a = this, x = other)
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_igamma_der_a_op<Scalar>, const Derived, const OtherDerived>
|
||||
igamma_der_a(const OtherDerived& other) const {
|
||||
return binaryExpr(other.derived(), internal::scalar_igamma_der_a_op<Scalar>());
|
||||
}
|
||||
|
||||
// gamma_sample_der_alpha(alpha = this, sample = other)
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_gamma_sample_der_alpha_op<Scalar>, const Derived, const OtherDerived>
|
||||
gamma_sample_der_alpha(const OtherDerived& other) const {
|
||||
return binaryExpr(other.derived(), internal::scalar_gamma_sample_der_alpha_op<Scalar>());
|
||||
}
|
||||
|
||||
// igammac(a = this, x = other)
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_igammac_op<Scalar>, const Derived, const OtherDerived>
|
||||
|
@ -105,6 +105,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
bool nByOne = false, oneByN = false;
|
||||
|
||||
enum {
|
||||
IsAligned = true,
|
||||
@ -142,6 +143,24 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
if (input_dims[0] == 1) {
|
||||
oneByN = true;
|
||||
for (int i = 1; i < NumDims; ++i) {
|
||||
if (broadcast[i] != 1) {
|
||||
oneByN = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (input_dims[NumDims-1] == 1) {
|
||||
nByOne = true;
|
||||
for (int i = 0; i < NumDims-1; ++i) {
|
||||
if (broadcast[i] != 1) {
|
||||
nByOne = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||
@ -237,9 +256,84 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
}
|
||||
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
return packetColMajor<LoadMode>(index);
|
||||
if (oneByN) {
|
||||
return packetNByOne<LoadMode>(index);
|
||||
} else if (nByOne) {
|
||||
return packetOneByN<LoadMode>(index);
|
||||
} else {
|
||||
return packetColMajor<LoadMode>(index);
|
||||
}
|
||||
} else {
|
||||
return packetRowMajor<LoadMode>(index);
|
||||
if (oneByN) {
|
||||
return packetOneByN<LoadMode>(index);
|
||||
} else if (nByOne) {
|
||||
return packetNByOne<LoadMode>(index);
|
||||
} else {
|
||||
return packetRowMajor<LoadMode>(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
Index dim, inputIndex;
|
||||
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
dim = NumDims - 1;
|
||||
} else {
|
||||
dim = 0;
|
||||
}
|
||||
|
||||
inputIndex = index % m_inputStrides[dim];
|
||||
if (inputIndex + PacketSize <= m_inputStrides[dim]) {
|
||||
return m_impl.template packet<Unaligned>(inputIndex);
|
||||
} else {
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
if (inputIndex > m_inputStrides[dim]-1) {
|
||||
inputIndex = 0;
|
||||
}
|
||||
values[i] = m_impl.coeff(inputIndex++);
|
||||
}
|
||||
return internal::pload<PacketReturnType>(values);
|
||||
}
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
Index dim, inputIndex, outputOffset;
|
||||
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
dim = 1;
|
||||
} else {
|
||||
dim = NumDims - 2;
|
||||
}
|
||||
|
||||
inputIndex = index / m_outputStrides[dim];
|
||||
outputOffset = index % m_outputStrides[dim];
|
||||
if (outputOffset + PacketSize <= m_outputStrides[dim]) {
|
||||
values[0] = m_impl.coeff(inputIndex);
|
||||
return internal::pload1<PacketReturnType>(values);
|
||||
} else {
|
||||
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
|
||||
if (outputOffset + cur < m_outputStrides[dim]) {
|
||||
values[i] = m_impl.coeff(inputIndex);
|
||||
} else {
|
||||
values[i] = m_impl.coeff(++inputIndex);
|
||||
outputOffset = 0;
|
||||
cur = 0;
|
||||
}
|
||||
}
|
||||
return internal::pload<PacketReturnType>(values);
|
||||
}
|
||||
}
|
||||
|
||||
@ -290,7 +384,11 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
values[0] = m_impl.coeff(inputIndex);
|
||||
for (int i = 1; i < PacketSize; ++i) {
|
||||
values[i] = coeffColMajor(originalIndex+i);
|
||||
if (innermostLoc + i < m_impl.dimensions()[0]) {
|
||||
values[i] = m_impl.coeff(inputIndex+i);
|
||||
} else {
|
||||
values[i] = coeffColMajor(originalIndex+i);
|
||||
}
|
||||
}
|
||||
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
||||
return rslt;
|
||||
@ -342,7 +440,11 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
values[0] = m_impl.coeff(inputIndex);
|
||||
for (int i = 1; i < PacketSize; ++i) {
|
||||
values[i] = coeffRowMajor(originalIndex+i);
|
||||
if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) {
|
||||
values[i] = m_impl.coeff(inputIndex+i);
|
||||
} else {
|
||||
values[i] = coeffRowMajor(originalIndex+i);
|
||||
}
|
||||
}
|
||||
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
||||
return rslt;
|
||||
|
@ -78,7 +78,7 @@ class TensorXsmmContractionBlocking {
|
||||
outer_n_ = outer_n_ != 0 ? outer_n_ : n;
|
||||
}
|
||||
#else
|
||||
// Defaults, possibly overriden per-platform.
|
||||
// Defaults, possibly overridden per-platform.
|
||||
copyA_ = true;
|
||||
copyB_ = false;
|
||||
|
||||
|
@ -350,7 +350,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
// Normal number of notifications for k slice switch is
|
||||
// nm_ + nn_ + nm_ * nn_. However, first P - 1 slices will receive only
|
||||
// nm_ + nn_ notifications, because they will not receive notifications
|
||||
// from preceeding kernels.
|
||||
// from preceding kernels.
|
||||
state_switch_[x] =
|
||||
x == 0
|
||||
? 1
|
||||
@ -530,7 +530,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
|
||||
void kernel(Index m, Index n, Index k) {
|
||||
// Note: order of iteration matters here. Iteration over m is innermost
|
||||
// because we want to reuse the same packed rhs in consequetive tasks
|
||||
// because we want to reuse the same packed rhs in consecutive tasks
|
||||
// (rhs fits into L2$ while lhs only into L3$).
|
||||
const Index nend = n * gn_ + gn(n);
|
||||
const Index mend = m * gm_ + gm(m);
|
||||
|
@ -195,7 +195,7 @@ class TensorCostModel {
|
||||
// 11 is L2 cache latency on Haswell.
|
||||
// We don't know whether data is in L1, L2 or L3. But we are most interested
|
||||
// in single-threaded computational time around 100us-10ms (smaller time
|
||||
// is too small for parallelization, larger time is not intersting
|
||||
// is too small for parallelization, larger time is not interesting
|
||||
// either because we are probably using all available threads already).
|
||||
// And for the target time range, L2 seems to be what matters. Data set
|
||||
// fitting into L1 is too small to take noticeable time. Data set fitting
|
||||
|
@ -286,7 +286,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) {
|
||||
tileSize =static_cast<Index>(m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>());
|
||||
auto s= m_queue.get_device().template get_info<cl::sycl::info::device::vendor>();
|
||||
std::transform(s.begin(), s.end(), s.begin(), ::tolower);
|
||||
if(m_queue.get_device().is_cpu()){ // intel doesnot allow to use max workgroup size
|
||||
if(m_queue.get_device().is_cpu()){ // intel doesn't allow to use max workgroup size
|
||||
tileSize=std::min(static_cast<Index>(256), static_cast<Index>(tileSize));
|
||||
}
|
||||
rng = n;
|
||||
@ -303,7 +303,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) {
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE void parallel_for_setup(Index dim0, Index dim1, Index &tileSize0, Index &tileSize1, Index &rng0, Index &rng1, Index &GRange0, Index &GRange1) const {
|
||||
Index max_workgroup_Size = static_cast<Index>(maxSyclThreadsPerBlock());
|
||||
if(m_queue.get_device().is_cpu()){ // intel doesnot allow to use max workgroup size
|
||||
if(m_queue.get_device().is_cpu()){ // intel doesn't allow to use max workgroup size
|
||||
max_workgroup_Size=std::min(static_cast<Index>(256), static_cast<Index>(max_workgroup_Size));
|
||||
}
|
||||
Index pow_of_2 = static_cast<Index>(std::log2(max_workgroup_Size));
|
||||
@ -331,7 +331,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) {
|
||||
template<typename Index>
|
||||
EIGEN_STRONG_INLINE void parallel_for_setup(Index dim0, Index dim1,Index dim2, Index &tileSize0, Index &tileSize1, Index &tileSize2, Index &rng0, Index &rng1, Index &rng2, Index &GRange0, Index &GRange1, Index &GRange2) const {
|
||||
Index max_workgroup_Size = static_cast<Index>(maxSyclThreadsPerBlock());
|
||||
if(m_queue.get_device().is_cpu()){ // intel doesnot allow to use max workgroup size
|
||||
if(m_queue.get_device().is_cpu()){ // intel doesn't allow to use max workgroup size
|
||||
max_workgroup_Size=std::min(static_cast<Index>(256), static_cast<Index>(max_workgroup_Size));
|
||||
}
|
||||
Index pow_of_2 = static_cast<Index>(std::log2(max_workgroup_Size));
|
||||
@ -377,7 +377,7 @@ m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) {
|
||||
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
|
||||
|
||||
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
|
||||
// OpenCL doesnot have such concept
|
||||
// OpenCL doesn't have such concept
|
||||
return 2;
|
||||
}
|
||||
|
||||
@ -519,7 +519,7 @@ struct SyclDevice {
|
||||
return m_queue_stream->maxSyclThreadsPerBlock();
|
||||
}
|
||||
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
|
||||
// OpenCL doesnot have such concept
|
||||
// OpenCL doesn't have such concept
|
||||
return m_queue_stream->maxSyclThreadsPerMultiProcessor();
|
||||
// return stream_->deviceProperties().maxThreadsPerMultiProcessor;
|
||||
}
|
||||
@ -544,7 +544,7 @@ struct SyclDevice {
|
||||
};
|
||||
// This is used as a distingushable device inside the kernel as the sycl device class is not Standard layout.
|
||||
// This is internal and must not be used by user. This dummy device allow us to specialise the tensor evaluator
|
||||
// inside the kenrel. So we can have two types of eval for host and device. This is required for TensorArgMax operation
|
||||
// inside the kernel. So we can have two types of eval for host and device. This is required for TensorArgMax operation
|
||||
struct SyclKernelDevice:DefaultDevice{};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -274,7 +274,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
||||
}
|
||||
}
|
||||
|
||||
// processs the line
|
||||
// process the line
|
||||
if (is_power_of_two) {
|
||||
processDataLineCooleyTukey(line_buf, line_len, log_len);
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// MakePointer class is used as a container of the adress space of the pointer
|
||||
// MakePointer class is used as a container of the address space of the pointer
|
||||
// on the host and on the device. From the host side it generates the T* pointer
|
||||
// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to
|
||||
// T* m_data on the host. It is always called on the device.
|
||||
|
@ -272,8 +272,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
||||
break;
|
||||
default:
|
||||
eigen_assert(false && "unexpected padding");
|
||||
m_outputCols=0; // silence the uninitialised warnig;
|
||||
m_outputRows=0; //// silence the uninitialised warnig;
|
||||
m_outputCols=0; // silence the uninitialised warning;
|
||||
m_outputRows=0; //// silence the uninitialised warning;
|
||||
}
|
||||
}
|
||||
eigen_assert(m_outputRows > 0);
|
||||
|
@ -167,7 +167,7 @@ struct TensorIntDivisor {
|
||||
shift2 = log_div > 1 ? log_div-1 : 0;
|
||||
}
|
||||
|
||||
// Must have 0 <= numerator. On platforms that dont support the __uint128_t
|
||||
// Must have 0 <= numerator. On platforms that don't support the __uint128_t
|
||||
// type numerator should also be less than 2^32-1.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const {
|
||||
eigen_assert(static_cast<typename UnsignedTraits<T>::type>(numerator) < NumTraits<UnsignedType>::highest()/2);
|
||||
|
@ -106,7 +106,7 @@ struct FullReducer<Self, Op, const Eigen::SyclDevice, Vectorizable> {
|
||||
/// if the shared memory is less than the GRange, we set shared_mem size to the TotalSize and in this case one kernel would be created for recursion to reduce all to one.
|
||||
if (GRange < outTileSize) outTileSize=GRange;
|
||||
/// creating the shared memory for calculating reduction.
|
||||
/// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can
|
||||
/// This one is used to collect all the reduced value of shared memory as we don't have global barrier on GPU. Once it is saved we can
|
||||
/// recursively apply reduction on it in order to reduce the whole.
|
||||
auto temp_global_buffer =cl::sycl::buffer<CoeffReturnType, 1>(cl::sycl::range<1>(GRange));
|
||||
typedef typename Eigen::internal::remove_all<decltype(self.xprDims())>::type Dims;
|
||||
@ -150,7 +150,7 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> {
|
||||
|
||||
// getting final out buffer at the moment the created buffer is true because there is no need for assign
|
||||
/// creating the shared memory for calculating reduction.
|
||||
/// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can
|
||||
/// This one is used to collect all the reduced value of shared memory as we don't have global barrier on GPU. Once it is saved we can
|
||||
/// recursively apply reduction on it in order to reduce the whole.
|
||||
dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange);
|
||||
dev.sycl_queue().submit([&](cl::sycl::handler &cgh) {
|
||||
|
@ -31,7 +31,7 @@ class TensorLazyBaseEvaluator {
|
||||
int refCount() const { return m_refcount; }
|
||||
|
||||
private:
|
||||
// No copy, no assigment;
|
||||
// No copy, no assignment;
|
||||
TensorLazyBaseEvaluator(const TensorLazyBaseEvaluator& other);
|
||||
TensorLazyBaseEvaluator& operator = (const TensorLazyBaseEvaluator& other);
|
||||
|
||||
|
@ -117,7 +117,7 @@ SYCLEXTRFUNCTERNARY()
|
||||
|
||||
|
||||
|
||||
//TensorCustomOp must be specialised otherewise it will be captured by UnaryCategory while its action is different
|
||||
//TensorCustomOp must be specialised otherwise it will be captured by UnaryCategory while its action is different
|
||||
//from the UnaryCategory and it is similar to the general FunctorExtractor.
|
||||
/// specialisation of TensorCustomOp
|
||||
#define SYCLEXTRFUNCCUSTOMUNARYOP(CVQual)\
|
||||
|
@ -80,7 +80,7 @@ template < typename HostExpr, typename FunctorExpr, typename Tuple_of_Acc, typen
|
||||
typedef typename ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
|
||||
auto device_expr = createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
|
||||
/// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
|
||||
/// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the
|
||||
/// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the
|
||||
/// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
|
||||
const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor);
|
||||
/// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
|
||||
@ -121,7 +121,7 @@ class ReductionFunctor<HostExpr, FunctorExpr, Tuple_of_Acc, Dims, Eigen::interna
|
||||
typedef typename ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
|
||||
auto device_expr = createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
|
||||
/// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
|
||||
/// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the
|
||||
/// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the
|
||||
/// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
|
||||
const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor);
|
||||
/// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
|
||||
@ -168,7 +168,7 @@ public:
|
||||
typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
|
||||
auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
|
||||
/// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
|
||||
/// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the
|
||||
/// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the
|
||||
/// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
|
||||
const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, op);
|
||||
/// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
|
||||
@ -215,7 +215,7 @@ public:
|
||||
typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
|
||||
auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
|
||||
/// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
|
||||
/// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the
|
||||
/// the first behaviour is when it is used as a root to launch the sub-kernel. The second one is when it is treated as a leafnode to pass the
|
||||
/// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
|
||||
const auto device_self_expr= Eigen::TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, op);
|
||||
/// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
|
||||
|
@ -143,7 +143,7 @@ struct IndexList {};
|
||||
/// \brief Collects internal details for generating index ranges [MIN, MAX)
|
||||
/// Declare primary template for index range builder
|
||||
/// \tparam MIN is the starting index in the tuple
|
||||
/// \tparam N represents sizeof..(elemens)- sizeof...(Is)
|
||||
/// \tparam N represents sizeof..(elements)- sizeof...(Is)
|
||||
/// \tparam Is... are the list of generated index so far
|
||||
template <size_t MIN, size_t N, size_t... Is>
|
||||
struct RangeBuilder;
|
||||
@ -161,7 +161,7 @@ struct RangeBuilder<MIN, MIN, Is...> {
|
||||
/// in this case we are recursively subtracting N by one and adding one
|
||||
/// index to Is... list until MIN==N
|
||||
/// \tparam MIN is the starting index in the tuple
|
||||
/// \tparam N represents sizeof..(elemens)- sizeof...(Is)
|
||||
/// \tparam N represents sizeof..(elements)- sizeof...(Is)
|
||||
/// \tparam Is... are the list of generated index so far
|
||||
template <size_t MIN, size_t N, size_t... Is>
|
||||
struct RangeBuilder : public RangeBuilder<MIN, N - 1, N - 1, Is...> {};
|
||||
|
@ -568,7 +568,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
||||
|
||||
Dimensions m_dimensions;
|
||||
|
||||
// Parameters passed to the costructor.
|
||||
// Parameters passed to the constructor.
|
||||
Index m_plane_strides;
|
||||
Index m_row_strides;
|
||||
Index m_col_strides;
|
||||
|
@ -241,7 +241,7 @@ struct dimino_first_step_elements
|
||||
* multiplying all elements in the given subgroup with the new
|
||||
* coset representative. Note that the first element of the
|
||||
* subgroup is always the identity element, so the first element of
|
||||
* ther result of this template is going to be the coset
|
||||
* the result of this template is going to be the coset
|
||||
* representative itself.
|
||||
*
|
||||
* Note that this template accepts an additional boolean parameter
|
||||
|
@ -33,10 +33,10 @@ namespace Eigen {
|
||||
// ec.Notify(true);
|
||||
//
|
||||
// Notify is cheap if there are no waiting threads. Prewait/CommitWait are not
|
||||
// cheap, but they are executed only if the preceeding predicate check has
|
||||
// cheap, but they are executed only if the preceding predicate check has
|
||||
// failed.
|
||||
//
|
||||
// Algorihtm outline:
|
||||
// Algorithm outline:
|
||||
// There are two main variables: predicate (managed by user) and state_.
|
||||
// Operation closely resembles Dekker mutual algorithm:
|
||||
// https://en.wikipedia.org/wiki/Dekker%27s_algorithm
|
||||
@ -79,7 +79,7 @@ class EventCount {
|
||||
uint64_t state = state_.load(std::memory_order_seq_cst);
|
||||
for (;;) {
|
||||
if (int64_t((state & kEpochMask) - epoch) < 0) {
|
||||
// The preceeding waiter has not decided on its fate. Wait until it
|
||||
// The preceding waiter has not decided on its fate. Wait until it
|
||||
// calls either CancelWait or CommitWait, or is notified.
|
||||
EIGEN_THREAD_YIELD();
|
||||
state = state_.load(std::memory_order_seq_cst);
|
||||
@ -110,7 +110,7 @@ class EventCount {
|
||||
uint64_t state = state_.load(std::memory_order_relaxed);
|
||||
for (;;) {
|
||||
if (int64_t((state & kEpochMask) - epoch) < 0) {
|
||||
// The preceeding waiter has not decided on its fate. Wait until it
|
||||
// The preceding waiter has not decided on its fate. Wait until it
|
||||
// calls either CancelWait or CommitWait, or is notified.
|
||||
EIGEN_THREAD_YIELD();
|
||||
state = state_.load(std::memory_order_relaxed);
|
||||
|
@ -198,7 +198,7 @@ class RunQueue {
|
||||
};
|
||||
std::mutex mutex_;
|
||||
// Low log(kSize) + 1 bits in front_ and back_ contain rolling index of
|
||||
// front/back, repsectively. The remaining bits contain modification counters
|
||||
// front/back, respectively. The remaining bits contain modification counters
|
||||
// that are incremented on Push operations. This allows us to (1) distinguish
|
||||
// between empty and full conditions (if we would use log(kSize) bits for
|
||||
// position, these conditions would be indistinguishable); (2) obtain
|
||||
|
@ -219,7 +219,7 @@ template<class T, std::size_t N> struct array_size<const array<T,N>& > {
|
||||
|
||||
#else
|
||||
|
||||
// The compiler supports c++11, and we're not targetting cuda: use std::array as Eigen::array
|
||||
// The compiler supports c++11, and we're not targeting cuda: use std::array as Eigen::array
|
||||
#include <array>
|
||||
namespace Eigen {
|
||||
|
||||
|
@ -35,7 +35,7 @@
|
||||
* a zero for the system (Powell hybrid "dogleg" method).
|
||||
*
|
||||
* This code is a port of minpack (http://en.wikipedia.org/wiki/MINPACK).
|
||||
* Minpack is a very famous, old, robust and well-reknown package, written in
|
||||
* Minpack is a very famous, old, robust and well renowned package, written in
|
||||
* fortran. Those implementations have been carefully tuned, tested, and used
|
||||
* for several decades.
|
||||
*
|
||||
@ -63,7 +63,7 @@
|
||||
* Other tests were added by myself at the very beginning of the
|
||||
* process and check the results for levenberg-marquardt using the reference data
|
||||
* on http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml. Since then i've
|
||||
* carefully checked that the same results were obtained when modifiying the
|
||||
* carefully checked that the same results were obtained when modifying the
|
||||
* code. Please note that we do not always get the exact same decimals as they do,
|
||||
* but this is ok : they use 128bits float, and we do the tests using the C type 'double',
|
||||
* which is 64 bits on most platforms (x86 and amd64, at least).
|
||||
|
@ -25,7 +25,7 @@ namespace Eigen {
|
||||
*
|
||||
* This module provides wrapper functions for a couple of OpenGL functions
|
||||
* which simplify the way to pass Eigen's object to openGL.
|
||||
* Here is an exmaple:
|
||||
* Here is an example:
|
||||
*
|
||||
* \code
|
||||
* // You need to add path_to_eigen/unsupported to your include path.
|
||||
|
@ -29,6 +29,8 @@ namespace Eigen {
|
||||
* - erfc
|
||||
* - lgamma
|
||||
* - igamma
|
||||
* - igamma_der_a
|
||||
* - gamma_sample_der_alpha
|
||||
* - igammac
|
||||
* - digamma
|
||||
* - polygamma
|
||||
|
@ -170,7 +170,7 @@ private:
|
||||
typedef internal::vector_int_pair<Scalar, Dim> VIPair;
|
||||
typedef std::vector<VIPair, aligned_allocator<VIPair> > VIPairList;
|
||||
typedef Matrix<Scalar, Dim, 1> VectorType;
|
||||
struct VectorComparator //compares vectors, or, more specificall, VIPairs along a particular dimension
|
||||
struct VectorComparator //compares vectors, or more specifically, VIPairs along a particular dimension
|
||||
{
|
||||
VectorComparator(int inDim) : dim(inDim) {}
|
||||
inline bool operator()(const VIPair &v1, const VIPair &v2) const { return v1.first[dim] < v2.first[dim]; }
|
||||
|
@ -300,7 +300,7 @@ public:
|
||||
|
||||
/** \brief Reports whether previous computation was successful.
|
||||
*
|
||||
* \returns \c Success if computation was succesful, \c NoConvergence otherwise.
|
||||
* \returns \c Success if computation was successful, \c NoConvergence otherwise.
|
||||
*/
|
||||
ComputationInfo info() const
|
||||
{
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
namespace Eigen
|
||||
{
|
||||
// Forward declerations
|
||||
// Forward declarations
|
||||
template <typename _Scalar, class _System>
|
||||
class EulerAngles;
|
||||
|
||||
|
@ -99,7 +99,7 @@ void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV)
|
||||
/** \ingroup IterativeSolvers_Module
|
||||
* Constrained conjugate gradient
|
||||
*
|
||||
* Computes the minimum of \f$ 1/2((Ax).x) - bx \f$ under the contraint \f$ Cx \le f \f$
|
||||
* Computes the minimum of \f$ 1/2((Ax).x) - bx \f$ under the constraint \f$ Cx \le f \f$
|
||||
*/
|
||||
template<typename TMatrix, typename CMatrix,
|
||||
typename VectorX, typename VectorB, typename VectorF>
|
||||
|
@ -39,7 +39,6 @@ template <typename VectorType, typename IndexType>
|
||||
void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut)
|
||||
{
|
||||
eigen_assert(vec.size() == perm.size());
|
||||
typedef typename IndexType::Scalar Index;
|
||||
bool flag;
|
||||
for (Index k = 0; k < ncut; k++)
|
||||
{
|
||||
@ -112,7 +111,6 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
|
||||
using Base::_solve_impl;
|
||||
typedef _MatrixType MatrixType;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::Index Index;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef _Preconditioner Preconditioner;
|
||||
@ -146,7 +144,7 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
|
||||
void _solve_with_guess_impl(const Rhs& b, Dest& x) const
|
||||
{
|
||||
bool failed = false;
|
||||
for(int j=0; j<b.cols(); ++j)
|
||||
for(Index j=0; j<b.cols(); ++j)
|
||||
{
|
||||
m_iterations = Base::maxIterations();
|
||||
m_error = Base::m_tolerance;
|
||||
@ -170,17 +168,17 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
|
||||
/**
|
||||
* Get the restart value
|
||||
*/
|
||||
int restart() { return m_restart; }
|
||||
Index restart() { return m_restart; }
|
||||
|
||||
/**
|
||||
* Set the restart value (default is 30)
|
||||
*/
|
||||
void set_restart(const int restart) { m_restart=restart; }
|
||||
void set_restart(const Index restart) { m_restart=restart; }
|
||||
|
||||
/**
|
||||
* Set the number of eigenvalues to deflate at each restart
|
||||
*/
|
||||
void setEigenv(const int neig)
|
||||
void setEigenv(const Index neig)
|
||||
{
|
||||
m_neig = neig;
|
||||
if (neig+1 > m_maxNeig) m_maxNeig = neig+1; // To allow for complex conjugates
|
||||
@ -189,12 +187,12 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
|
||||
/**
|
||||
* Get the size of the deflation subspace size
|
||||
*/
|
||||
int deflSize() {return m_r; }
|
||||
Index deflSize() {return m_r; }
|
||||
|
||||
/**
|
||||
* Set the maximum size of the deflation subspace
|
||||
*/
|
||||
void setMaxEigenv(const int maxNeig) { m_maxNeig = maxNeig; }
|
||||
void setMaxEigenv(const Index maxNeig) { m_maxNeig = maxNeig; }
|
||||
|
||||
protected:
|
||||
// DGMRES algorithm
|
||||
@ -202,27 +200,27 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> >
|
||||
void dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x, const Preconditioner& precond) const;
|
||||
// Perform one cycle of GMRES
|
||||
template<typename Dest>
|
||||
int dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const;
|
||||
Index dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const;
|
||||
// Compute data to use for deflation
|
||||
int dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const;
|
||||
Index dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const;
|
||||
// Apply deflation to a vector
|
||||
template<typename RhsType, typename DestType>
|
||||
int dgmresApplyDeflation(const RhsType& In, DestType& Out) const;
|
||||
Index dgmresApplyDeflation(const RhsType& In, DestType& Out) const;
|
||||
ComplexVector schurValues(const ComplexSchur<DenseMatrix>& schurofH) const;
|
||||
ComplexVector schurValues(const RealSchur<DenseMatrix>& schurofH) const;
|
||||
// Init data for deflation
|
||||
void dgmresInitDeflation(Index& rows) const;
|
||||
mutable DenseMatrix m_V; // Krylov basis vectors
|
||||
mutable DenseMatrix m_H; // Hessenberg matrix
|
||||
mutable DenseMatrix m_Hes; // Initial hessenberg matrix wihout Givens rotations applied
|
||||
mutable DenseMatrix m_Hes; // Initial hessenberg matrix without Givens rotations applied
|
||||
mutable Index m_restart; // Maximum size of the Krylov subspace
|
||||
mutable DenseMatrix m_U; // Vectors that form the basis of the invariant subspace
|
||||
mutable DenseMatrix m_MU; // matrix operator applied to m_U (for next cycles)
|
||||
mutable DenseMatrix m_T; /* T=U^T*M^{-1}*A*U */
|
||||
mutable PartialPivLU<DenseMatrix> m_luT; // LU factorization of m_T
|
||||
mutable StorageIndex m_neig; //Number of eigenvalues to extract at each restart
|
||||
mutable int m_r; // Current number of deflated eigenvalues, size of m_U
|
||||
mutable int m_maxNeig; // Maximum number of eigenvalues to deflate
|
||||
mutable Index m_r; // Current number of deflated eigenvalues, size of m_U
|
||||
mutable Index m_maxNeig; // Maximum number of eigenvalues to deflate
|
||||
mutable RealScalar m_lambdaN; //Modulus of the largest eigenvalue of A
|
||||
mutable bool m_isDeflAllocated;
|
||||
mutable bool m_isDeflInitialized;
|
||||
@ -244,13 +242,13 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh
|
||||
const Preconditioner& precond) const
|
||||
{
|
||||
//Initialization
|
||||
int n = mat.rows();
|
||||
Index n = mat.rows();
|
||||
DenseVector r0(n);
|
||||
int nbIts = 0;
|
||||
Index nbIts = 0;
|
||||
m_H.resize(m_restart+1, m_restart);
|
||||
m_Hes.resize(m_restart, m_restart);
|
||||
m_V.resize(n,m_restart+1);
|
||||
//Initial residual vector and intial norm
|
||||
//Initial residual vector and initial norm
|
||||
x = precond.solve(x);
|
||||
r0 = rhs - mat * x;
|
||||
RealScalar beta = r0.norm();
|
||||
@ -284,7 +282,7 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh
|
||||
*/
|
||||
template< typename _MatrixType, typename _Preconditioner>
|
||||
template<typename Dest>
|
||||
int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const
|
||||
Index DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const
|
||||
{
|
||||
//Initialization
|
||||
DenseVector g(m_restart+1); // Right hand side of the least square problem
|
||||
@ -293,8 +291,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
|
||||
m_V.col(0) = r0/beta;
|
||||
m_info = NoConvergence;
|
||||
std::vector<JacobiRotation<Scalar> >gr(m_restart); // Givens rotations
|
||||
int it = 0; // Number of inner iterations
|
||||
int n = mat.rows();
|
||||
Index it = 0; // Number of inner iterations
|
||||
Index n = mat.rows();
|
||||
DenseVector tv1(n), tv2(n); //Temporary vectors
|
||||
while (m_info == NoConvergence && it < m_restart && nbIts < m_iterations)
|
||||
{
|
||||
@ -312,7 +310,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
|
||||
|
||||
// Orthogonalize it with the previous basis in the basis using modified Gram-Schmidt
|
||||
Scalar coef;
|
||||
for (int i = 0; i <= it; ++i)
|
||||
for (Index i = 0; i <= it; ++i)
|
||||
{
|
||||
coef = tv1.dot(m_V.col(i));
|
||||
tv1 = tv1 - coef * m_V.col(i);
|
||||
@ -328,7 +326,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con
|
||||
// FIXME Check for happy breakdown
|
||||
|
||||
// Update Hessenberg matrix with Givens rotations
|
||||
for (int i = 1; i <= it; ++i)
|
||||
for (Index i = 1; i <= it; ++i)
|
||||
{
|
||||
m_H.col(it).applyOnTheLeft(i-1,i,gr[i-1].adjoint());
|
||||
}
|
||||
@ -418,7 +416,7 @@ inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_Matr
|
||||
}
|
||||
|
||||
template< typename _MatrixType, typename _Preconditioner>
|
||||
int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const
|
||||
Index DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const
|
||||
{
|
||||
// First, find the Schur form of the Hessenberg matrix H
|
||||
typename internal::conditional<NumTraits<Scalar>::IsComplex, ComplexSchur<DenseMatrix>, RealSchur<DenseMatrix> >::type schurofH;
|
||||
@ -433,8 +431,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
|
||||
|
||||
// Reorder the absolute values of Schur values
|
||||
DenseRealVector modulEig(it);
|
||||
for (int j=0; j<it; ++j) modulEig(j) = std::abs(eig(j));
|
||||
perm.setLinSpaced(it,0,it-1);
|
||||
for (Index j=0; j<it; ++j) modulEig(j) = std::abs(eig(j));
|
||||
perm.setLinSpaced(it,0,internal::convert_index<StorageIndex>(it-1));
|
||||
internal::sortWithPermutation(modulEig, perm, neig);
|
||||
|
||||
if (!m_lambdaN)
|
||||
@ -442,7 +440,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
|
||||
m_lambdaN = (std::max)(modulEig.maxCoeff(), m_lambdaN);
|
||||
}
|
||||
//Count the real number of extracted eigenvalues (with complex conjugates)
|
||||
int nbrEig = 0;
|
||||
Index nbrEig = 0;
|
||||
while (nbrEig < neig)
|
||||
{
|
||||
if(eig(perm(it-nbrEig-1)).imag() == RealScalar(0)) nbrEig++;
|
||||
@ -451,7 +449,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
|
||||
// Extract the Schur vectors corresponding to the smallest Ritz values
|
||||
DenseMatrix Sr(it, nbrEig);
|
||||
Sr.setZero();
|
||||
for (int j = 0; j < nbrEig; j++)
|
||||
for (Index j = 0; j < nbrEig; j++)
|
||||
{
|
||||
Sr.col(j) = schurofH.matrixU().col(perm(it-j-1));
|
||||
}
|
||||
@ -462,8 +460,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
|
||||
if (m_r)
|
||||
{
|
||||
// Orthogonalize X against m_U using modified Gram-Schmidt
|
||||
for (int j = 0; j < nbrEig; j++)
|
||||
for (int k =0; k < m_r; k++)
|
||||
for (Index j = 0; j < nbrEig; j++)
|
||||
for (Index k =0; k < m_r; k++)
|
||||
X.col(j) = X.col(j) - (m_U.col(k).dot(X.col(j)))*m_U.col(k);
|
||||
}
|
||||
|
||||
@ -473,7 +471,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
|
||||
dgmresInitDeflation(m);
|
||||
DenseMatrix MX(m, nbrEig);
|
||||
DenseVector tv1(m);
|
||||
for (int j = 0; j < nbrEig; j++)
|
||||
for (Index j = 0; j < nbrEig; j++)
|
||||
{
|
||||
tv1 = mat * X.col(j);
|
||||
MX.col(j) = precond.solve(tv1);
|
||||
@ -488,8 +486,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
|
||||
}
|
||||
|
||||
// Save X into m_U and m_MX in m_MU
|
||||
for (int j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j);
|
||||
for (int j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j);
|
||||
for (Index j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j);
|
||||
for (Index j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j);
|
||||
// Increase the size of the invariant subspace
|
||||
m_r += nbrEig;
|
||||
|
||||
@ -502,7 +500,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri
|
||||
}
|
||||
template<typename _MatrixType, typename _Preconditioner>
|
||||
template<typename RhsType, typename DestType>
|
||||
int DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const
|
||||
Index DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const
|
||||
{
|
||||
DenseVector x1 = m_U.leftCols(m_r).transpose() * x;
|
||||
y = x + m_U.leftCols(m_r) * ( m_lambdaN * m_luT.solve(x1) - x1);
|
||||
|
@ -73,7 +73,7 @@ void lmqrsolv(
|
||||
qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj;
|
||||
wa[k] = temp;
|
||||
|
||||
/* accumulate the tranformation in the row of s. */
|
||||
/* accumulate the transformation in the row of s. */
|
||||
for (i = k+1; i<n; ++i) {
|
||||
temp = givens.c() * s(i,k) + givens.s() * sdiag[i];
|
||||
sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i];
|
||||
|
@ -233,9 +233,9 @@ class LevenbergMarquardt : internal::no_assignment_operator
|
||||
|
||||
/**
|
||||
* \brief Reports whether the minimization was successful
|
||||
* \returns \c Success if the minimization was succesful,
|
||||
* \returns \c Success if the minimization was successful,
|
||||
* \c NumericalIssue if a numerical problem arises during the
|
||||
* minimization process, for exemple during the QR factorization
|
||||
* minimization process, for example during the QR factorization
|
||||
* \c NoConvergence if the minimization did not converge after
|
||||
* the maximum number of function evaluation allowed
|
||||
* \c InvalidInput if the input matrix is invalid
|
||||
|
@ -313,7 +313,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
|
||||
matrix_exp_pade17(A, U, V);
|
||||
}
|
||||
|
||||
#elif LDBL_MANT_DIG <= 112 // quadruple precison
|
||||
#elif LDBL_MANT_DIG <= 112 // quadruple precision
|
||||
|
||||
if (l1norm < 1.639394610288918690547467954466970e-005L) {
|
||||
matrix_exp_pade3(arg, U, V);
|
||||
|
@ -81,7 +81,7 @@ class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParen
|
||||
*
|
||||
* \note Currently this class is only used by MatrixPower. One may
|
||||
* insist that this be nested into MatrixPower. This class is here to
|
||||
* faciliate future development of triangular matrix functions.
|
||||
* facilitate future development of triangular matrix functions.
|
||||
*/
|
||||
template<typename MatrixType>
|
||||
class MatrixPowerAtomic : internal::noncopyable
|
||||
|
@ -61,7 +61,7 @@ void qrsolv(
|
||||
qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj;
|
||||
wa[k] = temp;
|
||||
|
||||
/* accumulate the tranformation in the row of s. */
|
||||
/* accumulate the transformation in the row of s. */
|
||||
for (i = k+1; i<n; ++i) {
|
||||
temp = givens.c() * s(i,k) + givens.s() * sdiag[i];
|
||||
sdiag[i] = -givens.s() * s(i,k) + givens.c() * sdiag[i];
|
||||
|
@ -22,7 +22,7 @@ void r1updt(
|
||||
Scalar temp;
|
||||
JacobiRotation<Scalar> givens;
|
||||
|
||||
// r1updt had a broader usecase, but we dont use it here. And, more
|
||||
// r1updt had a broader usecase, but we don't use it here. And, more
|
||||
// importantly, we can not test it.
|
||||
eigen_assert(m==n);
|
||||
eigen_assert(u.size()==m);
|
||||
|
@ -104,7 +104,7 @@ class companion
|
||||
/** Helper function for the balancing algorithm.
|
||||
* \returns true if the row and the column, having colNorm and rowNorm
|
||||
* as norms, are balanced, false otherwise.
|
||||
* colB and rowB are repectively the multipliers for
|
||||
* colB and rowB are respectively the multipliers for
|
||||
* the column and the row in order to balance them.
|
||||
* */
|
||||
bool balanced( RealScalar colNorm, RealScalar rowNorm,
|
||||
@ -113,7 +113,7 @@ class companion
|
||||
/** Helper function for the balancing algorithm.
|
||||
* \returns true if the row and the column, having colNorm and rowNorm
|
||||
* as norms, are balanced, false otherwise.
|
||||
* colB and rowB are repectively the multipliers for
|
||||
* colB and rowB are respectively the multipliers for
|
||||
* the column and the row in order to balance them.
|
||||
* */
|
||||
bool balancedR( RealScalar colNorm, RealScalar rowNorm,
|
||||
|
@ -41,7 +41,7 @@ public:
|
||||
|
||||
/** Sets the relative threshold value used to prune zero coefficients during the decomposition.
|
||||
*
|
||||
* Setting a value greater than zero speeds up computation, and yields to an imcomplete
|
||||
* Setting a value greater than zero speeds up computation, and yields to an incomplete
|
||||
* factorization with fewer non zero coefficients. Such approximate factors are especially
|
||||
* useful to initialize an iterative solver.
|
||||
*
|
||||
|
@ -206,26 +206,26 @@ public:
|
||||
if (col > row) //upper matrix
|
||||
{
|
||||
const Index minOuterIndex = inner - m_data.upperProfile(inner);
|
||||
eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage");
|
||||
eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage");
|
||||
return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner)));
|
||||
}
|
||||
if (col < row) //lower matrix
|
||||
{
|
||||
const Index minInnerIndex = outer - m_data.lowerProfile(outer);
|
||||
eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage");
|
||||
eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage");
|
||||
return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer)));
|
||||
}
|
||||
} else {
|
||||
if (outer > inner) //upper matrix
|
||||
{
|
||||
const Index maxOuterIndex = inner + m_data.upperProfile(inner);
|
||||
eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage");
|
||||
eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage");
|
||||
return this->m_data.upper(m_colStartIndex[inner] + (outer - inner));
|
||||
}
|
||||
if (outer < inner) //lower matrix
|
||||
{
|
||||
const Index maxInnerIndex = outer + m_data.lowerProfile(outer);
|
||||
eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage");
|
||||
eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage");
|
||||
return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer));
|
||||
}
|
||||
}
|
||||
@ -300,11 +300,11 @@ public:
|
||||
|
||||
if (IsRowMajor) {
|
||||
const Index minInnerIndex = outer - m_data.lowerProfile(outer);
|
||||
eigen_assert(inner >= minInnerIndex && "you try to acces a coeff that do not exist in the storage");
|
||||
eigen_assert(inner >= minInnerIndex && "You tried to access a coeff that does not exist in the storage");
|
||||
return this->m_data.lower(m_rowStartIndex[outer] + inner - (outer - m_data.lowerProfile(outer)));
|
||||
} else {
|
||||
const Index maxInnerIndex = outer + m_data.lowerProfile(outer);
|
||||
eigen_assert(inner <= maxInnerIndex && "you try to acces a coeff that do not exist in the storage");
|
||||
eigen_assert(inner <= maxInnerIndex && "You tried to access a coeff that does not exist in the storage");
|
||||
return this->m_data.lower(m_rowStartIndex[outer] + (inner - outer));
|
||||
}
|
||||
}
|
||||
@ -336,11 +336,11 @@ public:
|
||||
|
||||
if (IsRowMajor) {
|
||||
const Index minOuterIndex = inner - m_data.upperProfile(inner);
|
||||
eigen_assert(outer >= minOuterIndex && "you try to acces a coeff that do not exist in the storage");
|
||||
eigen_assert(outer >= minOuterIndex && "You tried to access a coeff that does not exist in the storage");
|
||||
return this->m_data.upper(m_colStartIndex[inner] + outer - (inner - m_data.upperProfile(inner)));
|
||||
} else {
|
||||
const Index maxOuterIndex = inner + m_data.upperProfile(inner);
|
||||
eigen_assert(outer <= maxOuterIndex && "you try to acces a coeff that do not exist in the storage");
|
||||
eigen_assert(outer <= maxOuterIndex && "You tried to access a coeff that does not exist in the storage");
|
||||
return this->m_data.upper(m_colStartIndex[inner] + (outer - inner));
|
||||
}
|
||||
}
|
||||
|
@ -187,7 +187,7 @@ template<typename _Scalar, int _Options, typename _StorageIndex>
|
||||
/** Does nothing: provided for compatibility with SparseMatrix */
|
||||
inline void finalize() {}
|
||||
|
||||
/** Suppress all nonzeros which are smaller than \a reference under the tolerence \a epsilon */
|
||||
/** Suppress all nonzeros which are smaller than \a reference under the tolerance \a epsilon */
|
||||
void prune(Scalar reference, RealScalar epsilon = NumTraits<RealScalar>::dummy_precision())
|
||||
{
|
||||
for (Index j=0; j<outerSize(); ++j)
|
||||
@ -224,21 +224,21 @@ template<typename _Scalar, int _Options, typename _StorageIndex>
|
||||
}
|
||||
}
|
||||
|
||||
/** The class DynamicSparseMatrix is deprectaed */
|
||||
/** The class DynamicSparseMatrix is deprecated */
|
||||
EIGEN_DEPRECATED inline DynamicSparseMatrix()
|
||||
: m_innerSize(0), m_data(0)
|
||||
{
|
||||
eigen_assert(innerSize()==0 && outerSize()==0);
|
||||
}
|
||||
|
||||
/** The class DynamicSparseMatrix is deprectaed */
|
||||
/** The class DynamicSparseMatrix is deprecated */
|
||||
EIGEN_DEPRECATED inline DynamicSparseMatrix(Index rows, Index cols)
|
||||
: m_innerSize(0)
|
||||
{
|
||||
resize(rows, cols);
|
||||
}
|
||||
|
||||
/** The class DynamicSparseMatrix is deprectaed */
|
||||
/** The class DynamicSparseMatrix is deprecated */
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEPRECATED explicit inline DynamicSparseMatrix(const SparseMatrixBase<OtherDerived>& other)
|
||||
: m_innerSize(0)
|
||||
|
@ -104,7 +104,7 @@ namespace internal
|
||||
out << value.real << " " << value.imag()<< "\n";
|
||||
}
|
||||
|
||||
} // end namepsace internal
|
||||
} // end namespace internal
|
||||
|
||||
inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isvector)
|
||||
{
|
||||
|
@ -33,6 +33,48 @@ igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerive
|
||||
);
|
||||
}
|
||||
|
||||
/** \cpp11 \returns an expression of the coefficient-wise igamma_der_a(\a a, \a x) to the given arrays.
|
||||
*
|
||||
* This function computes the coefficient-wise derivative of the incomplete
|
||||
* gamma function with respect to the parameter a.
|
||||
*
|
||||
* \note This function supports only float and double scalar types in c++11
|
||||
* mode. To support other scalar types,
|
||||
* or float/double in non c++11 mode, the user has to provide implementations
|
||||
* of igamma_der_a(T,T) for any scalar
|
||||
* type T to be supported.
|
||||
*
|
||||
* \sa Eigen::igamma(), Eigen::lgamma()
|
||||
*/
|
||||
template <typename Derived, typename ExponentDerived>
|
||||
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
|
||||
igamma_der_a(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) {
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
|
||||
a.derived(),
|
||||
x.derived());
|
||||
}
|
||||
|
||||
/** \cpp11 \returns an expression of the coefficient-wise gamma_sample_der_alpha(\a alpha, \a sample) to the given arrays.
|
||||
*
|
||||
* This function computes the coefficient-wise derivative of the sample
|
||||
* of a Gamma(alpha, 1) random variable with respect to the parameter alpha.
|
||||
*
|
||||
* \note This function supports only float and double scalar types in c++11
|
||||
* mode. To support other scalar types,
|
||||
* or float/double in non c++11 mode, the user has to provide implementations
|
||||
* of gamma_sample_der_alpha(T,T) for any scalar
|
||||
* type T to be supported.
|
||||
*
|
||||
* \sa Eigen::igamma(), Eigen::lgamma()
|
||||
*/
|
||||
template <typename AlphaDerived, typename SampleDerived>
|
||||
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived>
|
||||
gamma_sample_der_alpha(const Eigen::ArrayBase<AlphaDerived>& alpha, const Eigen::ArrayBase<SampleDerived>& sample) {
|
||||
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived>(
|
||||
alpha.derived(),
|
||||
sample.derived());
|
||||
}
|
||||
|
||||
/** \cpp11 \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays.
|
||||
*
|
||||
* This function computes the coefficient-wise complementary incomplete gamma function.
|
||||
|
@ -41,6 +41,60 @@ struct functor_traits<scalar_igamma_op<Scalar> > {
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the derivative of the incomplete gamma
|
||||
* function igamma_der_a(a, x)
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::igamma_der_a
|
||||
*/
|
||||
template <typename Scalar>
|
||||
struct scalar_igamma_der_a_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_der_a_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a, const Scalar& x) const {
|
||||
using numext::igamma_der_a;
|
||||
return igamma_der_a(a, x);
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const {
|
||||
return internal::pigamma_der_a(a, x);
|
||||
}
|
||||
};
|
||||
template <typename Scalar>
|
||||
struct functor_traits<scalar_igamma_der_a_op<Scalar> > {
|
||||
enum {
|
||||
// 2x the cost of igamma
|
||||
Cost = 40 * NumTraits<Scalar>::MulCost + 20 * NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasIGammaDerA
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the derivative of the sample
|
||||
* of a Gamma(alpha, 1) random variable with respect to the parameter alpha
|
||||
* gamma_sample_der_alpha(alpha, sample)
|
||||
*
|
||||
* \sa class CwiseBinaryOp, Cwise::gamma_sample_der_alpha
|
||||
*/
|
||||
template <typename Scalar>
|
||||
struct scalar_gamma_sample_der_alpha_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_gamma_sample_der_alpha_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& alpha, const Scalar& sample) const {
|
||||
using numext::gamma_sample_der_alpha;
|
||||
return gamma_sample_der_alpha(alpha, sample);
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& alpha, const Packet& sample) const {
|
||||
return internal::pgamma_sample_der_alpha(alpha, sample);
|
||||
}
|
||||
};
|
||||
template <typename Scalar>
|
||||
struct functor_traits<scalar_gamma_sample_der_alpha_op<Scalar> > {
|
||||
enum {
|
||||
// 2x the cost of igamma, minus the lgamma cost (the lgamma cancels out)
|
||||
Cost = 30 * NumTraits<Scalar>::MulCost + 15 * NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasGammaSampleDerAlpha
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the complementary incomplete gamma function igammac(a, x)
|
||||
|
@ -33,6 +33,14 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::h
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
|
||||
return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma_der_a(const Eigen::half& a, const Eigen::half& x) {
|
||||
return Eigen::half(Eigen::numext::igamma_der_a(static_cast<float>(a), static_cast<float>(x)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half gamma_sample_der_alpha(const Eigen::half& alpha, const Eigen::half& sample) {
|
||||
return Eigen::half(Eigen::numext::gamma_sample_der_alpha(static_cast<float>(alpha), static_cast<float>(sample)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) {
|
||||
return Eigen::half(Eigen::numext::igammac(static_cast<float>(a), static_cast<float>(x)));
|
||||
}
|
||||
|
@ -521,6 +521,197 @@ struct cephes_helper<double> {
|
||||
}
|
||||
};
|
||||
|
||||
enum IgammaComputationMode { VALUE, DERIVATIVE, SAMPLE_DERIVATIVE };
|
||||
|
||||
template <typename Scalar, IgammaComputationMode mode>
|
||||
EIGEN_DEVICE_FUNC
|
||||
int igamma_num_iterations() {
|
||||
/* Returns the maximum number of internal iterations for igamma computation.
|
||||
*/
|
||||
if (mode == VALUE) {
|
||||
return 2000;
|
||||
}
|
||||
|
||||
if (internal::is_same<Scalar, float>::value) {
|
||||
return 200;
|
||||
} else if (internal::is_same<Scalar, double>::value) {
|
||||
return 500;
|
||||
} else {
|
||||
return 2000;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar, IgammaComputationMode mode>
|
||||
struct igammac_cf_impl {
|
||||
/* Computes igamc(a, x) or derivative (depending on the mode)
|
||||
* using the continued fraction expansion of the complementary
|
||||
* incomplete Gamma function.
|
||||
*
|
||||
* Preconditions:
|
||||
* a > 0
|
||||
* x >= 1
|
||||
* x >= a
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
static Scalar run(Scalar a, Scalar x) {
|
||||
const Scalar zero = 0;
|
||||
const Scalar one = 1;
|
||||
const Scalar two = 2;
|
||||
const Scalar machep = cephes_helper<Scalar>::machep();
|
||||
const Scalar big = cephes_helper<Scalar>::big();
|
||||
const Scalar biginv = cephes_helper<Scalar>::biginv();
|
||||
|
||||
if ((numext::isinf)(x)) {
|
||||
return zero;
|
||||
}
|
||||
|
||||
// continued fraction
|
||||
Scalar y = one - a;
|
||||
Scalar z = x + y + one;
|
||||
Scalar c = zero;
|
||||
Scalar pkm2 = one;
|
||||
Scalar qkm2 = x;
|
||||
Scalar pkm1 = x + one;
|
||||
Scalar qkm1 = z * x;
|
||||
Scalar ans = pkm1 / qkm1;
|
||||
|
||||
Scalar dpkm2_da = zero;
|
||||
Scalar dqkm2_da = zero;
|
||||
Scalar dpkm1_da = zero;
|
||||
Scalar dqkm1_da = -x;
|
||||
Scalar dans_da = (dpkm1_da - ans * dqkm1_da) / qkm1;
|
||||
|
||||
for (int i = 0; i < igamma_num_iterations<Scalar, mode>(); i++) {
|
||||
c += one;
|
||||
y += one;
|
||||
z += two;
|
||||
|
||||
Scalar yc = y * c;
|
||||
Scalar pk = pkm1 * z - pkm2 * yc;
|
||||
Scalar qk = qkm1 * z - qkm2 * yc;
|
||||
|
||||
Scalar dpk_da = dpkm1_da * z - pkm1 - dpkm2_da * yc + pkm2 * c;
|
||||
Scalar dqk_da = dqkm1_da * z - qkm1 - dqkm2_da * yc + qkm2 * c;
|
||||
|
||||
if (qk != zero) {
|
||||
Scalar ans_prev = ans;
|
||||
ans = pk / qk;
|
||||
|
||||
Scalar dans_da_prev = dans_da;
|
||||
dans_da = (dpk_da - ans * dqk_da) / qk;
|
||||
|
||||
if (mode == VALUE) {
|
||||
if (numext::abs(ans_prev - ans) <= machep * numext::abs(ans)) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (numext::abs(dans_da - dans_da_prev) <= machep) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pkm2 = pkm1;
|
||||
pkm1 = pk;
|
||||
qkm2 = qkm1;
|
||||
qkm1 = qk;
|
||||
|
||||
dpkm2_da = dpkm1_da;
|
||||
dpkm1_da = dpk_da;
|
||||
dqkm2_da = dqkm1_da;
|
||||
dqkm1_da = dqk_da;
|
||||
|
||||
if (numext::abs(pk) > big) {
|
||||
pkm2 *= biginv;
|
||||
pkm1 *= biginv;
|
||||
qkm2 *= biginv;
|
||||
qkm1 *= biginv;
|
||||
|
||||
dpkm2_da *= biginv;
|
||||
dpkm1_da *= biginv;
|
||||
dqkm2_da *= biginv;
|
||||
dqkm1_da *= biginv;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute x**a * exp(-x) / gamma(a) */
|
||||
Scalar logax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
|
||||
Scalar dlogax_da = numext::log(x) - digamma_impl<Scalar>::run(a);
|
||||
Scalar ax = numext::exp(logax);
|
||||
Scalar dax_da = ax * dlogax_da;
|
||||
|
||||
switch (mode) {
|
||||
case VALUE:
|
||||
return ans * ax;
|
||||
case DERIVATIVE:
|
||||
return ans * dax_da + dans_da * ax;
|
||||
case SAMPLE_DERIVATIVE:
|
||||
return -(dans_da + ans * dlogax_da) * x;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar, IgammaComputationMode mode>
|
||||
struct igamma_series_impl {
|
||||
/* Computes igam(a, x) or its derivative (depending on the mode)
|
||||
* using the series expansion of the incomplete Gamma function.
|
||||
*
|
||||
* Preconditions:
|
||||
* x > 0
|
||||
* a > 0
|
||||
* !(x > 1 && x > a)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
static Scalar run(Scalar a, Scalar x) {
|
||||
const Scalar zero = 0;
|
||||
const Scalar one = 1;
|
||||
const Scalar machep = cephes_helper<Scalar>::machep();
|
||||
|
||||
/* power series */
|
||||
Scalar r = a;
|
||||
Scalar c = one;
|
||||
Scalar ans = one;
|
||||
|
||||
Scalar dc_da = zero;
|
||||
Scalar dans_da = zero;
|
||||
|
||||
for (int i = 0; i < igamma_num_iterations<Scalar, mode>(); i++) {
|
||||
r += one;
|
||||
Scalar term = x / r;
|
||||
Scalar dterm_da = -x / (r * r);
|
||||
dc_da = term * dc_da + dterm_da * c;
|
||||
dans_da += dc_da;
|
||||
c *= term;
|
||||
ans += c;
|
||||
|
||||
if (mode == VALUE) {
|
||||
if (c <= machep * ans) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (numext::abs(dc_da) <= machep * numext::abs(dans_da)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute x**a * exp(-x) / gamma(a + 1) */
|
||||
Scalar logax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a + one);
|
||||
Scalar dlogax_da = numext::log(x) - digamma_impl<Scalar>::run(a + one);
|
||||
Scalar ax = numext::exp(logax);
|
||||
Scalar dax_da = ax * dlogax_da;
|
||||
|
||||
switch (mode) {
|
||||
case VALUE:
|
||||
return ans * ax;
|
||||
case DERIVATIVE:
|
||||
return ans * dax_da + dans_da * ax;
|
||||
case SAMPLE_DERIVATIVE:
|
||||
return -(dans_da + ans * dlogax_da) * x / a;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#if !EIGEN_HAS_C99_MATH
|
||||
|
||||
template <typename Scalar>
|
||||
@ -535,8 +726,6 @@ struct igammac_impl {
|
||||
|
||||
#else
|
||||
|
||||
template <typename Scalar> struct igamma_impl; // predeclare igamma_impl
|
||||
|
||||
template <typename Scalar>
|
||||
struct igammac_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
@ -604,97 +793,15 @@ struct igammac_impl {
|
||||
return nan;
|
||||
}
|
||||
|
||||
if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans
|
||||
if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans
|
||||
return nan;
|
||||
}
|
||||
|
||||
if ((x < one) || (x < a)) {
|
||||
/* The checks above ensure that we meet the preconditions for
|
||||
* igamma_impl::Impl(), so call it, rather than igamma_impl::Run().
|
||||
* Calling Run() would also work, but in that case the compiler may not be
|
||||
* able to prove that igammac_impl::Run and igamma_impl::Run are not
|
||||
* mutually recursive. This leads to worse code, particularly on
|
||||
* platforms like nvptx, where recursion is allowed only begrudgingly.
|
||||
*/
|
||||
return (one - igamma_impl<Scalar>::Impl(a, x));
|
||||
return (one - igamma_series_impl<Scalar, VALUE>::run(a, x));
|
||||
}
|
||||
|
||||
return Impl(a, x);
|
||||
}
|
||||
|
||||
private:
|
||||
/* igamma_impl calls igammac_impl::Impl. */
|
||||
friend struct igamma_impl<Scalar>;
|
||||
|
||||
/* Actually computes igamc(a, x).
|
||||
*
|
||||
* Preconditions:
|
||||
* a > 0
|
||||
* x >= 1
|
||||
* x >= a
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
|
||||
const Scalar zero = 0;
|
||||
const Scalar one = 1;
|
||||
const Scalar two = 2;
|
||||
const Scalar machep = cephes_helper<Scalar>::machep();
|
||||
const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
|
||||
const Scalar big = cephes_helper<Scalar>::big();
|
||||
const Scalar biginv = cephes_helper<Scalar>::biginv();
|
||||
const Scalar inf = NumTraits<Scalar>::infinity();
|
||||
|
||||
Scalar ans, ax, c, yc, r, t, y, z;
|
||||
Scalar pk, pkm1, pkm2, qk, qkm1, qkm2;
|
||||
|
||||
if (x == inf) return zero; // std::isinf crashes on CUDA
|
||||
|
||||
/* Compute x**a * exp(-x) / gamma(a) */
|
||||
ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
|
||||
if (ax < -maxlog) { // underflow
|
||||
return zero;
|
||||
}
|
||||
ax = numext::exp(ax);
|
||||
|
||||
// continued fraction
|
||||
y = one - a;
|
||||
z = x + y + one;
|
||||
c = zero;
|
||||
pkm2 = one;
|
||||
qkm2 = x;
|
||||
pkm1 = x + one;
|
||||
qkm1 = z * x;
|
||||
ans = pkm1 / qkm1;
|
||||
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
c += one;
|
||||
y += one;
|
||||
z += two;
|
||||
yc = y * c;
|
||||
pk = pkm1 * z - pkm2 * yc;
|
||||
qk = qkm1 * z - qkm2 * yc;
|
||||
if (qk != zero) {
|
||||
r = pk / qk;
|
||||
t = numext::abs((ans - r) / r);
|
||||
ans = r;
|
||||
} else {
|
||||
t = one;
|
||||
}
|
||||
pkm2 = pkm1;
|
||||
pkm1 = pk;
|
||||
qkm2 = qkm1;
|
||||
qkm1 = qk;
|
||||
if (numext::abs(pk) > big) {
|
||||
pkm2 *= biginv;
|
||||
pkm1 *= biginv;
|
||||
qkm2 *= biginv;
|
||||
qkm1 *= biginv;
|
||||
}
|
||||
if (t <= machep) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (ans * ax);
|
||||
return igammac_cf_impl<Scalar, VALUE>::run(a, x);
|
||||
}
|
||||
};
|
||||
|
||||
@ -704,15 +811,10 @@ struct igammac_impl {
|
||||
* Implementation of igamma (incomplete gamma integral), based on Cephes but requires C++11/C99 *
|
||||
************************************************************************************************/
|
||||
|
||||
template <typename Scalar>
|
||||
struct igamma_retval {
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#if !EIGEN_HAS_C99_MATH
|
||||
|
||||
template <typename Scalar>
|
||||
struct igamma_impl {
|
||||
template <typename Scalar, IgammaComputationMode mode>
|
||||
struct igamma_generic_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
@ -723,69 +825,17 @@ struct igamma_impl {
|
||||
|
||||
#else
|
||||
|
||||
template <typename Scalar>
|
||||
struct igamma_impl {
|
||||
template <typename Scalar, IgammaComputationMode mode>
|
||||
struct igamma_generic_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static Scalar run(Scalar a, Scalar x) {
|
||||
/* igam()
|
||||
* Incomplete gamma integral
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* double a, x, y, igam();
|
||||
*
|
||||
* y = igam( a, x );
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* The function is defined by
|
||||
*
|
||||
* x
|
||||
* -
|
||||
* 1 | | -t a-1
|
||||
* igam(a,x) = ----- | e t dt.
|
||||
* - | |
|
||||
* | (a) -
|
||||
* 0
|
||||
*
|
||||
*
|
||||
* In this implementation both arguments must be positive.
|
||||
* The integral is evaluated by either a power series or
|
||||
* continued fraction expansion, depending on the relative
|
||||
* values of a and x.
|
||||
*
|
||||
* ACCURACY (double):
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
* IEEE 0,30 200000 3.6e-14 2.9e-15
|
||||
* IEEE 0,100 300000 9.9e-14 1.5e-14
|
||||
*
|
||||
*
|
||||
* ACCURACY (float):
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
* IEEE 0,30 20000 7.8e-6 5.9e-7
|
||||
*
|
||||
*/
|
||||
/*
|
||||
Cephes Math Library Release 2.2: June, 1992
|
||||
Copyright 1985, 1987, 1992 by Stephen L. Moshier
|
||||
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
|
||||
*/
|
||||
|
||||
|
||||
/* left tail of incomplete gamma function:
|
||||
*
|
||||
* inf. k
|
||||
* a -x - x
|
||||
* x e > ----------
|
||||
* - -
|
||||
* k=0 | (a+k+1)
|
||||
/* Depending on the mode, returns
|
||||
* - VALUE: incomplete Gamma function igamma(a, x)
|
||||
* - DERIVATIVE: derivative of incomplete Gamma function d/da igamma(a, x)
|
||||
* - SAMPLE_DERIVATIVE: implicit derivative of a Gamma random variable
|
||||
* x ~ Gamma(x | a, 1), dx/da = -1 / Gamma(x | a, 1) * d igamma(a, x) / dx
|
||||
*
|
||||
* Derivatives are implemented by forward-mode differentiation.
|
||||
*/
|
||||
const Scalar zero = 0;
|
||||
const Scalar one = 1;
|
||||
@ -797,71 +847,167 @@ struct igamma_impl {
|
||||
return nan;
|
||||
}
|
||||
|
||||
if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans
|
||||
if ((numext::isnan)(a) || (numext::isnan)(x)) { // propagate nans
|
||||
return nan;
|
||||
}
|
||||
|
||||
if ((x > one) && (x > a)) {
|
||||
/* The checks above ensure that we meet the preconditions for
|
||||
* igammac_impl::Impl(), so call it, rather than igammac_impl::Run().
|
||||
* Calling Run() would also work, but in that case the compiler may not be
|
||||
* able to prove that igammac_impl::Run and igamma_impl::Run are not
|
||||
* mutually recursive. This leads to worse code, particularly on
|
||||
* platforms like nvptx, where recursion is allowed only begrudgingly.
|
||||
*/
|
||||
return (one - igammac_impl<Scalar>::Impl(a, x));
|
||||
}
|
||||
|
||||
return Impl(a, x);
|
||||
}
|
||||
|
||||
private:
|
||||
/* igammac_impl calls igamma_impl::Impl. */
|
||||
friend struct igammac_impl<Scalar>;
|
||||
|
||||
/* Actually computes igam(a, x).
|
||||
*
|
||||
* Preconditions:
|
||||
* x > 0
|
||||
* a > 0
|
||||
* !(x > 1 && x > a)
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
|
||||
const Scalar zero = 0;
|
||||
const Scalar one = 1;
|
||||
const Scalar machep = cephes_helper<Scalar>::machep();
|
||||
const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
|
||||
|
||||
Scalar ans, ax, c, r;
|
||||
|
||||
/* Compute x**a * exp(-x) / gamma(a) */
|
||||
ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
|
||||
if (ax < -maxlog) {
|
||||
// underflow
|
||||
return zero;
|
||||
}
|
||||
ax = numext::exp(ax);
|
||||
|
||||
/* power series */
|
||||
r = a;
|
||||
c = one;
|
||||
ans = one;
|
||||
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
r += one;
|
||||
c *= x/r;
|
||||
ans += c;
|
||||
if (c/ans <= machep) {
|
||||
break;
|
||||
Scalar ret = igammac_cf_impl<Scalar, mode>::run(a, x);
|
||||
if (mode == VALUE) {
|
||||
return one - ret;
|
||||
} else {
|
||||
return -ret;
|
||||
}
|
||||
}
|
||||
|
||||
return (ans * ax / a);
|
||||
return igamma_series_impl<Scalar, mode>::run(a, x);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
template <typename Scalar>
|
||||
struct igamma_retval {
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct igamma_impl : igamma_generic_impl<Scalar, VALUE> {
|
||||
/* igam()
|
||||
* Incomplete gamma integral.
|
||||
*
|
||||
* The CDF of Gamma(a, 1) random variable at the point x.
|
||||
*
|
||||
* Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample
|
||||
* 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points.
|
||||
* The ground truth is computed by mpmath. Mean absolute error:
|
||||
* float: 1.26713e-05
|
||||
* double: 2.33606e-12
|
||||
*
|
||||
* Cephes documentation below.
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* double a, x, y, igam();
|
||||
*
|
||||
* y = igam( a, x );
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* The function is defined by
|
||||
*
|
||||
* x
|
||||
* -
|
||||
* 1 | | -t a-1
|
||||
* igam(a,x) = ----- | e t dt.
|
||||
* - | |
|
||||
* | (a) -
|
||||
* 0
|
||||
*
|
||||
*
|
||||
* In this implementation both arguments must be positive.
|
||||
* The integral is evaluated by either a power series or
|
||||
* continued fraction expansion, depending on the relative
|
||||
* values of a and x.
|
||||
*
|
||||
* ACCURACY (double):
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
* IEEE 0,30 200000 3.6e-14 2.9e-15
|
||||
* IEEE 0,100 300000 9.9e-14 1.5e-14
|
||||
*
|
||||
*
|
||||
* ACCURACY (float):
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
* IEEE 0,30 20000 7.8e-6 5.9e-7
|
||||
*
|
||||
*/
|
||||
/*
|
||||
Cephes Math Library Release 2.2: June, 1992
|
||||
Copyright 1985, 1987, 1992 by Stephen L. Moshier
|
||||
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
|
||||
*/
|
||||
|
||||
/* left tail of incomplete gamma function:
|
||||
*
|
||||
* inf. k
|
||||
* a -x - x
|
||||
* x e > ----------
|
||||
* - -
|
||||
* k=0 | (a+k+1)
|
||||
*
|
||||
*/
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct igamma_der_a_retval : igamma_retval<Scalar> {};
|
||||
|
||||
template <typename Scalar>
|
||||
struct igamma_der_a_impl : igamma_generic_impl<Scalar, DERIVATIVE> {
|
||||
/* Derivative of the incomplete Gamma function with respect to a.
|
||||
*
|
||||
* Computes d/da igamma(a, x) by forward differentiation of the igamma code.
|
||||
*
|
||||
* Accuracy estimation. For each a in [10^-2, 10^-1...10^3] we sample
|
||||
* 50 Gamma random variables x ~ Gamma(x | a, 1), a total of 300 points.
|
||||
* The ground truth is computed by mpmath. Mean absolute error:
|
||||
* float: 6.17992e-07
|
||||
* double: 4.60453e-12
|
||||
*
|
||||
* Reference:
|
||||
* R. Moore. "Algorithm AS 187: Derivatives of the incomplete gamma
|
||||
* integral". Journal of the Royal Statistical Society. 1982
|
||||
*/
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct gamma_sample_der_alpha_retval : igamma_retval<Scalar> {};
|
||||
|
||||
template <typename Scalar>
|
||||
struct gamma_sample_der_alpha_impl
|
||||
: igamma_generic_impl<Scalar, SAMPLE_DERIVATIVE> {
|
||||
/* Derivative of a Gamma random variable sample with respect to alpha.
|
||||
*
|
||||
* Consider a sample of a Gamma random variable with the concentration
|
||||
* parameter alpha: sample ~ Gamma(alpha, 1). The reparameterization
|
||||
* derivative that we want to compute is dsample / dalpha =
|
||||
* d igammainv(alpha, u) / dalpha, where u = igamma(alpha, sample).
|
||||
* However, this formula is numerically unstable and expensive, so instead
|
||||
* we use implicit differentiation:
|
||||
*
|
||||
* igamma(alpha, sample) = u, where u ~ Uniform(0, 1).
|
||||
* Apply d / dalpha to both sides:
|
||||
* d igamma(alpha, sample) / dalpha
|
||||
* + d igamma(alpha, sample) / dsample * dsample/dalpha = 0
|
||||
* d igamma(alpha, sample) / dalpha
|
||||
* + Gamma(sample | alpha, 1) dsample / dalpha = 0
|
||||
* dsample/dalpha = - (d igamma(alpha, sample) / dalpha)
|
||||
* / Gamma(sample | alpha, 1)
|
||||
*
|
||||
* Here Gamma(sample | alpha, 1) is the PDF of the Gamma distribution
|
||||
* (note that the derivative of the CDF w.r.t. sample is the PDF).
|
||||
* See the reference below for more details.
|
||||
*
|
||||
* The derivative of igamma(alpha, sample) is computed by forward
|
||||
* differentiation of the igamma code. Division by the Gamma PDF is performed
|
||||
* in the same code, increasing the accuracy and speed due to cancellation
|
||||
* of some terms.
|
||||
*
|
||||
* Accuracy estimation. For each alpha in [10^-2, 10^-1...10^3] we sample
|
||||
* 50 Gamma random variables sample ~ Gamma(sample | alpha, 1), a total of 300
|
||||
* points. The ground truth is computed by mpmath. Mean absolute error:
|
||||
* float: 2.1686e-06
|
||||
* double: 1.4774e-12
|
||||
*
|
||||
* Reference:
|
||||
* M. Figurnov, S. Mohamed, A. Mnih "Implicit Reparameterization Gradients".
|
||||
* 2018
|
||||
*/
|
||||
};
|
||||
|
||||
/*****************************************************************************
|
||||
* Implementation of Riemann zeta function of two arguments, based on Cephes *
|
||||
*****************************************************************************/
|
||||
@ -1574,6 +1720,8 @@ struct betainc_impl<double> {
|
||||
}
|
||||
};
|
||||
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
/****************************************************************************
|
||||
* Implementation of Bessel function, based on Cephes *
|
||||
****************************************************************************/
|
||||
@ -1902,8 +2050,6 @@ struct i1e_impl<double> {
|
||||
}
|
||||
};
|
||||
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
namespace numext {
|
||||
@ -1950,6 +2096,18 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
|
||||
return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma_der_a, Scalar)
|
||||
igamma_der_a(const Scalar& a, const Scalar& x) {
|
||||
return EIGEN_MATHFUNC_IMPL(igamma_der_a, Scalar)::run(a, x);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(gamma_sample_der_alpha, Scalar)
|
||||
gamma_sample_der_alpha(const Scalar& a, const Scalar& x) {
|
||||
return EIGEN_MATHFUNC_IMPL(gamma_sample_der_alpha, Scalar)::run(a, x);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar)
|
||||
igammac(const Scalar& a, const Scalar& x) {
|
||||
|
@ -42,6 +42,21 @@ Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
|
||||
|
||||
/** \internal \returns the derivative of the incomplete gamma function
|
||||
* igamma_der_a(\a a, \a x) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigamma_der_a(const Packet& a, const Packet& x) {
|
||||
using numext::igamma_der_a; return igamma_der_a(a, x);
|
||||
}
|
||||
|
||||
/** \internal \returns compute the derivative of the sample
|
||||
* of Gamma(alpha, 1) random variable with respect to the parameter a
|
||||
* gamma_sample_der_alpha(\a alpha, \a sample) */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pgamma_sample_der_alpha(const Packet& alpha, const Packet& sample) {
|
||||
using numext::gamma_sample_der_alpha; return gamma_sample_der_alpha(alpha, sample);
|
||||
}
|
||||
|
||||
/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); }
|
||||
|
@ -120,6 +120,41 @@ double2 pigamma<double2>(const double2& a, const double2& x)
|
||||
return make_double2(igamma(a.x, x.x), igamma(a.y, x.y));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pigamma_der_a<float4>(
|
||||
const float4& a, const float4& x) {
|
||||
using numext::igamma_der_a;
|
||||
return make_float4(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y),
|
||||
igamma_der_a(a.z, x.z), igamma_der_a(a.w, x.w));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
|
||||
pigamma_der_a<double2>(const double2& a, const double2& x) {
|
||||
using numext::igamma_der_a;
|
||||
return make_double2(igamma_der_a(a.x, x.x), igamma_der_a(a.y, x.y));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pgamma_sample_der_alpha<float4>(
|
||||
const float4& alpha, const float4& sample) {
|
||||
using numext::gamma_sample_der_alpha;
|
||||
return make_float4(
|
||||
gamma_sample_der_alpha(alpha.x, sample.x),
|
||||
gamma_sample_der_alpha(alpha.y, sample.y),
|
||||
gamma_sample_der_alpha(alpha.z, sample.z),
|
||||
gamma_sample_der_alpha(alpha.w, sample.w));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
|
||||
pgamma_sample_der_alpha<double2>(const double2& alpha, const double2& sample) {
|
||||
using numext::gamma_sample_der_alpha;
|
||||
return make_double2(
|
||||
gamma_sample_der_alpha(alpha.x, sample.x),
|
||||
gamma_sample_der_alpha(alpha.y, sample.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 pigammac<float4>(const float4& a, const float4& x)
|
||||
{
|
||||
|
@ -181,7 +181,7 @@ namespace Eigen
|
||||
* \ingroup Splines_Module
|
||||
*
|
||||
* \param[in] pts The data points to which a spline should be fit.
|
||||
* \param[out] chord_lengths The resulting chord lenggth vector.
|
||||
* \param[out] chord_lengths The resulting chord length vector.
|
||||
*
|
||||
* \sa Les Piegl and Wayne Tiller, The NURBS book (2nd ed.), 1997, 9.2.1 Global Curve Interpolation to Point Data
|
||||
**/
|
||||
|
@ -20,7 +20,7 @@ However, it:
|
||||
- must rely on Eigen,
|
||||
- must be highly related to math,
|
||||
- should have some general purpose in the sense that it could
|
||||
potentially become an offical Eigen module (or be merged into another one).
|
||||
potentially become an official Eigen module (or be merged into another one).
|
||||
|
||||
In doubt feel free to contact us. For instance, if your addons is very too specific
|
||||
but it shows an interesting way of using Eigen, then it could be a nice demo.
|
||||
|
@ -70,7 +70,7 @@ void bench_svd(const MatrixType& a = MatrixType())
|
||||
std::cout<< std::endl;
|
||||
timerJacobi.reset();
|
||||
timerBDC.reset();
|
||||
cout << " Computes rotaion matrix" <<endl;
|
||||
cout << " Computes rotation matrix" <<endl;
|
||||
for (int k=1; k<=NUMBER_SAMPLE; ++k)
|
||||
{
|
||||
timerBDC.start();
|
||||
|
@ -1,5 +1,5 @@
|
||||
# generate split test header file only if it does not yet exist
|
||||
# in order to prevent a rebuild everytime cmake is configured
|
||||
# in order to prevent a rebuild every time cmake is configured
|
||||
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h)
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h "")
|
||||
foreach(i RANGE 1 999)
|
||||
|
@ -81,7 +81,7 @@ void check_limits_specialization()
|
||||
typedef std::numeric_limits<AD> A;
|
||||
typedef std::numeric_limits<Scalar> B;
|
||||
|
||||
// workaround "unsed typedef" warning:
|
||||
// workaround "unused typedef" warning:
|
||||
VERIFY(!bool(internal::is_same<B, A>::value));
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
|
@ -180,6 +180,64 @@ static void test_fixed_size_broadcasting()
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_simple_broadcasting_one_by_n()
|
||||
{
|
||||
Tensor<float, 4, DataLayout> tensor(1,13,5,7);
|
||||
tensor.setRandom();
|
||||
array<ptrdiff_t, 4> broadcasts;
|
||||
broadcasts[0] = 9;
|
||||
broadcasts[1] = 1;
|
||||
broadcasts[2] = 1;
|
||||
broadcasts[3] = 1;
|
||||
Tensor<float, 4, DataLayout> broadcast;
|
||||
broadcast = tensor.broadcast(broadcasts);
|
||||
|
||||
VERIFY_IS_EQUAL(broadcast.dimension(0), 9);
|
||||
VERIFY_IS_EQUAL(broadcast.dimension(1), 13);
|
||||
VERIFY_IS_EQUAL(broadcast.dimension(2), 5);
|
||||
VERIFY_IS_EQUAL(broadcast.dimension(3), 7);
|
||||
|
||||
for (int i = 0; i < 9; ++i) {
|
||||
for (int j = 0; j < 13; ++j) {
|
||||
for (int k = 0; k < 5; ++k) {
|
||||
for (int l = 0; l < 7; ++l) {
|
||||
VERIFY_IS_EQUAL(tensor(i%1,j%13,k%5,l%7), broadcast(i,j,k,l));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_simple_broadcasting_n_by_one()
|
||||
{
|
||||
Tensor<float, 4, DataLayout> tensor(7,3,5,1);
|
||||
tensor.setRandom();
|
||||
array<ptrdiff_t, 4> broadcasts;
|
||||
broadcasts[0] = 1;
|
||||
broadcasts[1] = 1;
|
||||
broadcasts[2] = 1;
|
||||
broadcasts[3] = 19;
|
||||
Tensor<float, 4, DataLayout> broadcast;
|
||||
broadcast = tensor.broadcast(broadcasts);
|
||||
|
||||
VERIFY_IS_EQUAL(broadcast.dimension(0), 7);
|
||||
VERIFY_IS_EQUAL(broadcast.dimension(1), 3);
|
||||
VERIFY_IS_EQUAL(broadcast.dimension(2), 5);
|
||||
VERIFY_IS_EQUAL(broadcast.dimension(3), 19);
|
||||
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int k = 0; k < 5; ++k) {
|
||||
for (int l = 0; l < 19; ++l) {
|
||||
VERIFY_IS_EQUAL(tensor(i%7,j%3,k%5,l%1), broadcast(i,j,k,l));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cxx11_tensor_broadcasting()
|
||||
{
|
||||
@ -191,4 +249,8 @@ void test_cxx11_tensor_broadcasting()
|
||||
CALL_SUBTEST(test_static_broadcasting<RowMajor>());
|
||||
CALL_SUBTEST(test_fixed_size_broadcasting<ColMajor>());
|
||||
CALL_SUBTEST(test_fixed_size_broadcasting<RowMajor>());
|
||||
CALL_SUBTEST(test_simple_broadcasting_one_by_n<RowMajor>());
|
||||
CALL_SUBTEST(test_simple_broadcasting_n_by_one<RowMajor>());
|
||||
CALL_SUBTEST(test_simple_broadcasting_one_by_n<ColMajor>());
|
||||
CALL_SUBTEST(test_simple_broadcasting_n_by_one<ColMajor>());
|
||||
}
|
||||
|
@ -1318,6 +1318,157 @@ void test_cuda_i1e()
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_igamma_der_a()
|
||||
{
|
||||
Tensor<Scalar, 1> in_x(30);
|
||||
Tensor<Scalar, 1> in_a(30);
|
||||
Tensor<Scalar, 1> out(30);
|
||||
Tensor<Scalar, 1> expected_out(30);
|
||||
out.setZero();
|
||||
|
||||
Array<Scalar, 1, Dynamic> in_a_array(30);
|
||||
Array<Scalar, 1, Dynamic> in_x_array(30);
|
||||
Array<Scalar, 1, Dynamic> expected_out_array(30);
|
||||
|
||||
// See special_functions.cpp for the Python code that generates the test data.
|
||||
|
||||
in_a_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0,
|
||||
1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0,
|
||||
100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
|
||||
|
||||
in_x_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
|
||||
1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065,
|
||||
0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288,
|
||||
1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458,
|
||||
7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233,
|
||||
92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677,
|
||||
968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
|
||||
|
||||
expected_out_array << -32.7256441441, -36.4394150514, -9.66467612263,
|
||||
-36.4394150514, -36.4394150514, -1.0891900302, -2.66351229645,
|
||||
-2.48666868596, -0.929700494428, -3.56327722764, -0.455320135314,
|
||||
-0.391437214323, -0.491352055991, -0.350454834292, -0.471773162921,
|
||||
-0.104084440522, -0.0723646747909, -0.0992828975532, -0.121638215446,
|
||||
-0.122619605294, -0.0317670267286, -0.0359974812869, -0.0154359225363,
|
||||
-0.0375775365921, -0.00794899153653, -0.00777303219211, -0.00796085782042,
|
||||
-0.0125850719397, -0.00455500206958, -0.00476436993148;
|
||||
|
||||
for (int i = 0; i < 30; ++i) {
|
||||
in_x(i) = in_x_array(i);
|
||||
in_a(i) = in_a_array(i);
|
||||
expected_out(i) = expected_out_array(i);
|
||||
}
|
||||
|
||||
std::size_t bytes = in_x.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_a;
|
||||
Scalar* d_x;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_a), bytes);
|
||||
cudaMalloc((void**)(&d_x), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
|
||||
cudaMemcpy(d_a, in_a.data(), bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_a(d_a, 30);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_x(d_x, 30);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 30);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_a.igamma_der_a(gpu_x);
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
|
||||
gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 30; ++i) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
cudaFree(d_a);
|
||||
cudaFree(d_x);
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_gamma_sample_der_alpha()
|
||||
{
|
||||
Tensor<Scalar, 1> in_alpha(30);
|
||||
Tensor<Scalar, 1> in_sample(30);
|
||||
Tensor<Scalar, 1> out(30);
|
||||
Tensor<Scalar, 1> expected_out(30);
|
||||
out.setZero();
|
||||
|
||||
Array<Scalar, 1, Dynamic> in_alpha_array(30);
|
||||
Array<Scalar, 1, Dynamic> in_sample_array(30);
|
||||
Array<Scalar, 1, Dynamic> expected_out_array(30);
|
||||
|
||||
// See special_functions.cpp for the Python code that generates the test data.
|
||||
|
||||
in_alpha_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0,
|
||||
1.0, 1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0,
|
||||
100.0, 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
|
||||
|
||||
in_sample_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
|
||||
1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065,
|
||||
0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288,
|
||||
1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458,
|
||||
7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233,
|
||||
92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677,
|
||||
968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
|
||||
|
||||
expected_out_array << 7.42424742367e-23, 1.02004297287e-34, 0.0130155240738,
|
||||
1.02004297287e-34, 1.02004297287e-34, 1.96505168277e-13, 0.525575786243,
|
||||
0.713903991771, 2.32077561808e-14, 0.000179348049886, 0.635500453302,
|
||||
1.27561284917, 0.878125852156, 0.41565819538, 1.03606488534,
|
||||
0.885964824887, 1.16424049334, 1.10764479598, 1.04590810812,
|
||||
1.04193666963, 0.965193152414, 0.976217589464, 0.93008035061,
|
||||
0.98153216096, 0.909196397698, 0.98434963993, 0.984738050206,
|
||||
1.00106492525, 0.97734200649, 1.02198794179;
|
||||
|
||||
for (int i = 0; i < 30; ++i) {
|
||||
in_alpha(i) = in_alpha_array(i);
|
||||
in_sample(i) = in_sample_array(i);
|
||||
expected_out(i) = expected_out_array(i);
|
||||
}
|
||||
|
||||
std::size_t bytes = in_alpha.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_alpha;
|
||||
Scalar* d_sample;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_alpha), bytes);
|
||||
cudaMalloc((void**)(&d_sample), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
|
||||
cudaMemcpy(d_alpha, in_alpha.data(), bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_sample, in_sample.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_alpha(d_alpha, 30);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_sample(d_sample, 30);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 30);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_alpha.gamma_sample_der_alpha(gpu_sample);
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
|
||||
gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 30; ++i) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
cudaFree(d_alpha);
|
||||
cudaFree(d_sample);
|
||||
cudaFree(d_out);
|
||||
}
|
||||
|
||||
void test_cxx11_tensor_cuda()
|
||||
{
|
||||
@ -1396,5 +1547,11 @@ void test_cxx11_tensor_cuda()
|
||||
|
||||
CALL_SUBTEST_6(test_cuda_i1e<float>());
|
||||
CALL_SUBTEST_6(test_cuda_i1e<double>());
|
||||
|
||||
CALL_SUBTEST_6(test_cuda_igamma_der_a<float>());
|
||||
CALL_SUBTEST_6(test_cuda_igamma_der_a<double>());
|
||||
|
||||
CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha<float>());
|
||||
CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha<double>());
|
||||
#endif
|
||||
}
|
||||
|
@ -22,10 +22,10 @@
|
||||
|
||||
using Eigen::Tensor;
|
||||
|
||||
// Inflation Defenition for each dimention the inflated val would be
|
||||
// Inflation Definition for each dimension the inflated val would be
|
||||
//((dim-1)*strid[dim] +1)
|
||||
|
||||
// for 1 dimnention vector of size 3 with value (4,4,4) with the inflated stride value of 3 would be changed to
|
||||
// for 1 dimension vector of size 3 with value (4,4,4) with the inflated stride value of 3 would be changed to
|
||||
// tensor of size (2*3) +1 = 7 with the value of
|
||||
// (4, 0, 0, 4, 0, 0, 4).
|
||||
|
||||
|
@ -247,7 +247,7 @@ void test_cuda_trancendental() {
|
||||
}
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl;
|
||||
if(std::abs(input2(i)-1.f)<0.05f) // log lacks accurary nearby 1
|
||||
if(std::abs(input2(i)-1.f)<0.05f) // log lacks accuracy nearby 1
|
||||
VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f));
|
||||
else
|
||||
VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
|
||||
|
@ -37,7 +37,7 @@ void test_cuda_random_uniform()
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
// For now we just check thes code doesn't crash.
|
||||
// For now we just check this code doesn't crash.
|
||||
// TODO: come up with a valid test of randomness
|
||||
}
|
||||
|
||||
|
@ -132,7 +132,7 @@ void test_forward_adolc()
|
||||
}
|
||||
|
||||
{
|
||||
// simple instanciation tests
|
||||
// simple instantiation tests
|
||||
Matrix<adtl::adouble,2,1> x;
|
||||
foo(x);
|
||||
Matrix<adtl::adouble,Dynamic,Dynamic> A(4,4);;
|
||||
|
@ -8,7 +8,7 @@
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
// import basic and product tests for deprectaed DynamicSparseMatrix
|
||||
// import basic and product tests for deprecated DynamicSparseMatrix
|
||||
#define EIGEN_NO_DEPRECATED_WARNING
|
||||
#include "sparse_basic.cpp"
|
||||
#include "sparse_product.cpp"
|
||||
|
@ -335,6 +335,7 @@ template<typename ArrayType> void array_special_functions()
|
||||
ArrayType test = betainc(a, b + one, x) + eps;
|
||||
verify_component_wise(test, expected););
|
||||
}
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
// Test Bessel function i0e. Reference results obtained with SciPy.
|
||||
{
|
||||
@ -375,7 +376,100 @@ template<typename ArrayType> void array_special_functions()
|
||||
CALL_SUBTEST(res = i1e(x);
|
||||
verify_component_wise(res, expected););
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Code to generate the data for the following two test cases.
|
||||
N = 5
|
||||
np.random.seed(3)
|
||||
|
||||
a = np.logspace(-2, 3, 6)
|
||||
a = np.ravel(np.tile(np.reshape(a, [-1, 1]), [1, N]))
|
||||
x = np.random.gamma(a, 1.0)
|
||||
x = np.maximum(x, np.finfo(np.float32).tiny)
|
||||
|
||||
def igamma(a, x):
|
||||
return mpmath.gammainc(a, 0, x, regularized=True)
|
||||
|
||||
def igamma_der_a(a, x):
|
||||
res = mpmath.diff(lambda a_prime: igamma(a_prime, x), a)
|
||||
return np.float64(res)
|
||||
|
||||
def gamma_sample_der_alpha(a, x):
|
||||
igamma_x = igamma(a, x)
|
||||
def igammainv_of_igamma(a_prime):
|
||||
return mpmath.findroot(lambda x_prime: igamma(a_prime, x_prime) -
|
||||
igamma_x, x, solver='newton')
|
||||
return np.float64(mpmath.diff(igammainv_of_igamma, a))
|
||||
|
||||
v_igamma_der_a = np.vectorize(igamma_der_a)(a, x)
|
||||
v_gamma_sample_der_alpha = np.vectorize(gamma_sample_der_alpha)(a, x)
|
||||
*/
|
||||
|
||||
#if EIGEN_HAS_C99_MATH
|
||||
// Test igamma_der_a
|
||||
{
|
||||
ArrayType a(30);
|
||||
ArrayType x(30);
|
||||
ArrayType res(30);
|
||||
ArrayType v(30);
|
||||
|
||||
a << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0, 1.0,
|
||||
1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0,
|
||||
100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
|
||||
|
||||
x << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
|
||||
1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16,
|
||||
0.0132865061065, 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06,
|
||||
0.333412038288, 1.18135687766, 0.580629033777, 0.170631439426,
|
||||
0.786686768458, 7.63873279537, 13.1944344379, 11.896042354,
|
||||
10.5830172417, 10.5020942233, 92.8918587747, 95.003720371,
|
||||
86.3715926467, 96.0330217672, 82.6389930677, 968.702906754,
|
||||
969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
|
||||
|
||||
v << -32.7256441441, -36.4394150514, -9.66467612263, -36.4394150514,
|
||||
-36.4394150514, -1.0891900302, -2.66351229645, -2.48666868596,
|
||||
-0.929700494428, -3.56327722764, -0.455320135314, -0.391437214323,
|
||||
-0.491352055991, -0.350454834292, -0.471773162921, -0.104084440522,
|
||||
-0.0723646747909, -0.0992828975532, -0.121638215446, -0.122619605294,
|
||||
-0.0317670267286, -0.0359974812869, -0.0154359225363, -0.0375775365921,
|
||||
-0.00794899153653, -0.00777303219211, -0.00796085782042,
|
||||
-0.0125850719397, -0.00455500206958, -0.00476436993148;
|
||||
|
||||
CALL_SUBTEST(res = igamma_der_a(a, x); verify_component_wise(res, v););
|
||||
}
|
||||
|
||||
// Test gamma_sample_der_alpha
|
||||
{
|
||||
ArrayType alpha(30);
|
||||
ArrayType sample(30);
|
||||
ArrayType res(30);
|
||||
ArrayType v(30);
|
||||
|
||||
alpha << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0,
|
||||
1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0,
|
||||
100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
|
||||
|
||||
sample << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
|
||||
1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16,
|
||||
0.0132865061065, 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06,
|
||||
0.333412038288, 1.18135687766, 0.580629033777, 0.170631439426,
|
||||
0.786686768458, 7.63873279537, 13.1944344379, 11.896042354,
|
||||
10.5830172417, 10.5020942233, 92.8918587747, 95.003720371,
|
||||
86.3715926467, 96.0330217672, 82.6389930677, 968.702906754,
|
||||
969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
|
||||
|
||||
v << 7.42424742367e-23, 1.02004297287e-34, 0.0130155240738,
|
||||
1.02004297287e-34, 1.02004297287e-34, 1.96505168277e-13, 0.525575786243,
|
||||
0.713903991771, 2.32077561808e-14, 0.000179348049886, 0.635500453302,
|
||||
1.27561284917, 0.878125852156, 0.41565819538, 1.03606488534,
|
||||
0.885964824887, 1.16424049334, 1.10764479598, 1.04590810812,
|
||||
1.04193666963, 0.965193152414, 0.976217589464, 0.93008035061,
|
||||
0.98153216096, 0.909196397698, 0.98434963993, 0.984738050206,
|
||||
1.00106492525, 0.97734200649, 1.02198794179;
|
||||
|
||||
CALL_SUBTEST(res = gamma_sample_der_alpha(alpha, sample);
|
||||
verify_component_wise(res, v););
|
||||
}
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
}
|
||||
|
||||
void test_special_functions()
|
||||
|
Loading…
x
Reference in New Issue
Block a user