mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-13 04:09:10 +08:00
Merged eigen/eigen into default
This commit is contained in:
commit
9115896590
@ -83,22 +83,10 @@ template<typename Derived> class ArrayBase
|
|||||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||||
|
|
||||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||||
/** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
|
typedef typename Base::PlainObject PlainObject;
|
||||||
* exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
|
|
||||||
* reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
|
|
||||||
* PlainObject or const PlainObject&.
|
|
||||||
*/
|
|
||||||
typedef Array<typename internal::traits<Derived>::Scalar,
|
|
||||||
internal::traits<Derived>::RowsAtCompileTime,
|
|
||||||
internal::traits<Derived>::ColsAtCompileTime,
|
|
||||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
|
||||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
|
||||||
internal::traits<Derived>::MaxColsAtCompileTime
|
|
||||||
> PlainObject;
|
|
||||||
|
|
||||||
|
|
||||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
|
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||||
|
|
||||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
|
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
|
||||||
|
@ -756,6 +756,26 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
|
|||||||
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
|
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Dst, typename Src, typename Func>
|
||||||
|
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
||||||
|
{
|
||||||
|
Index dstRows = src.rows();
|
||||||
|
Index dstCols = src.cols();
|
||||||
|
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||||
|
dst.resize(dstRows, dstCols);
|
||||||
|
|
||||||
|
// TODO check whether this is the right place to perform these checks:
|
||||||
|
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
||||||
|
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
|
||||||
|
|
||||||
|
Assignment<Dst,Src,Func>::run(dst, src, func);
|
||||||
|
}
|
||||||
|
template<typename Dst, typename Src>
|
||||||
|
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
|
||||||
|
{
|
||||||
|
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||||
|
}
|
||||||
|
|
||||||
// forward declaration
|
// forward declaration
|
||||||
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
|
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
|
||||||
|
|
||||||
@ -783,7 +803,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
|
|||||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||||
{
|
{
|
||||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||||
|
|
||||||
src.evalTo(dst);
|
src.evalTo(dst);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
256
Eigen/src/Core/Assign_MKL.h
Normal file → Executable file
256
Eigen/src/Core/Assign_MKL.h
Normal file → Executable file
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||||
|
Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
are permitted provided that the following conditions are met:
|
are permitted provided that the following conditions are met:
|
||||||
@ -37,17 +38,13 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template<typename Op> struct vml_call
|
template<typename Dst, typename Src>
|
||||||
{ enum { IsSupported = 0 }; };
|
|
||||||
|
|
||||||
template<typename Dst, typename Src, typename UnaryOp>
|
|
||||||
class vml_assign_traits
|
class vml_assign_traits
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
enum {
|
enum {
|
||||||
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
|
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
|
||||||
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
|
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
|
||||||
|
|
||||||
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
|
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
|
||||||
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
|
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
|
||||||
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
|
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
|
||||||
@ -57,173 +54,118 @@ class vml_assign_traits
|
|||||||
: int(Dst::MaxRowsAtCompileTime),
|
: int(Dst::MaxRowsAtCompileTime),
|
||||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
|
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
|
||||||
|
|
||||||
MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
|
MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
|
||||||
&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
|
|
||||||
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
|
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
|
||||||
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
|
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
|
||||||
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
|
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
|
||||||
MayEnableVml = MightEnableVml && LargeEnough,
|
|
||||||
MayLinearize = MayEnableVml && MightLinearize
|
|
||||||
};
|
};
|
||||||
public:
|
public:
|
||||||
enum {
|
enum {
|
||||||
Traversal = MayLinearize ? LinearVectorizedTraversal
|
EnableVml = MightEnableVml && LargeEnough,
|
||||||
: MayEnableVml ? InnerVectorizedTraversal
|
Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
|
||||||
: DefaultTraversal
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
|
#define EIGEN_PP_EXPAND(ARG) ARG
|
||||||
int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
|
|
||||||
struct vml_assign_impl
|
|
||||||
: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
|
|
||||||
{
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
|
||||||
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
|
|
||||||
{
|
|
||||||
typedef typename Derived1::Scalar Scalar;
|
|
||||||
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
|
||||||
{
|
|
||||||
// in case we want to (or have to) skip VML at runtime we can call:
|
|
||||||
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
|
||||||
const Index innerSize = dst.innerSize();
|
|
||||||
const Index outerSize = dst.outerSize();
|
|
||||||
for(Index outer = 0; outer < outerSize; ++outer) {
|
|
||||||
const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
|
|
||||||
&(src.nestedExpression().coeffRef(0, outer));
|
|
||||||
Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
|
|
||||||
vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
|
||||||
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
|
|
||||||
{
|
|
||||||
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
|
||||||
{
|
|
||||||
// in case we want to (or have to) skip VML at runtime we can call:
|
|
||||||
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
|
||||||
vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Macroses
|
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
|
|
||||||
template<typename Derived1, typename Derived2, typename UnaryOp> \
|
|
||||||
struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
|
|
||||||
static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
|
|
||||||
vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
|
|
||||||
} \
|
|
||||||
};
|
|
||||||
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
|
|
||||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
|
|
||||||
|
|
||||||
|
|
||||||
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
|
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
|
||||||
#define EIGEN_MKL_VML_MODE VML_HA
|
#define EIGEN_VMLMODE_EXPAND_LA , VML_HA
|
||||||
#else
|
#else
|
||||||
#define EIGEN_MKL_VML_MODE VML_LA
|
#define EIGEN_VMLMODE_EXPAND_LA , VML_LA
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
#define EIGEN_VMLMODE_EXPAND__
|
||||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
|
||||||
enum { IsSupported = 1 }; \
|
#define EIGEN_VMLMODE_PREFIX_LA vm
|
||||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
|
#define EIGEN_VMLMODE_PREFIX__ v
|
||||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
|
||||||
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
|
|
||||||
|
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||||
|
template< typename DstXprType, typename SrcXprNested> \
|
||||||
|
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
|
||||||
|
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
|
||||||
|
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||||
|
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
|
||||||
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||||
|
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
|
||||||
|
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
|
||||||
|
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
|
||||||
|
} else { \
|
||||||
|
const Index outerSize = dst.outerSize(); \
|
||||||
|
for(Index outer = 0; outer < outerSize; ++outer) { \
|
||||||
|
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
|
||||||
|
&(src.nestedExpression().coeffRef(0, outer)); \
|
||||||
|
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
||||||
|
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
|
||||||
|
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}; \
|
||||||
|
|
||||||
|
|
||||||
|
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
|
||||||
|
|
||||||
|
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
|
||||||
|
|
||||||
|
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
|
||||||
|
|
||||||
|
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA)
|
||||||
|
// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _)
|
||||||
|
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _)
|
||||||
|
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
||||||
|
|
||||||
|
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||||
|
template< typename DstXprType, typename SrcXprNested> \
|
||||||
|
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
|
||||||
|
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
|
||||||
|
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||||
|
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
|
||||||
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||||
|
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.functor().m_exponent); \
|
||||||
|
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
|
||||||
|
{ \
|
||||||
|
VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \
|
||||||
|
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
|
||||||
|
} else { \
|
||||||
|
const Index outerSize = dst.outerSize(); \
|
||||||
|
for(Index outer = 0; outer < outerSize; ++outer) { \
|
||||||
|
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
|
||||||
|
&(src.nestedExpression().coeffRef(0, outer)); \
|
||||||
|
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
||||||
|
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
|
||||||
|
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA)
|
||||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA)
|
||||||
enum { IsSupported = 1 }; \
|
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA)
|
||||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
|
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
|
||||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
|
||||||
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
|
||||||
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
|
|
||||||
} \
|
|
||||||
};
|
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
|
||||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
|
||||||
enum { IsSupported = 1 }; \
|
|
||||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
|
|
||||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
|
||||||
EIGENTYPE exponent = func.m_exponent; \
|
|
||||||
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
|
||||||
VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
|
|
||||||
(VMLTYPE*)dst, &vmlMode); \
|
|
||||||
} \
|
|
||||||
};
|
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
|
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
|
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
|
|
||||||
|
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
|
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
|
|
||||||
|
|
||||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
|
|
||||||
|
|
||||||
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sinh, Sinh)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cosh, Cosh)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(atan, Atan)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tanh, Tanh)
|
|
||||||
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log10, Log10)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt)
|
|
||||||
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(arg, Arg)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor)
|
|
||||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil)
|
|
||||||
|
|
||||||
// The vm*powx functions are not avaibale in the windows version of MKL.
|
|
||||||
#ifndef _WIN32
|
|
||||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
|
|
||||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
|
|
||||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
|
|
||||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
@ -113,10 +113,10 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
|
|||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename CustomNullaryOp>
|
template<typename CustomNullaryOp>
|
||||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
|
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||||
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
|
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
|
||||||
{
|
{
|
||||||
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
|
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||||
@ -139,12 +139,12 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
|
|||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename CustomNullaryOp>
|
template<typename CustomNullaryOp>
|
||||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
|
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||||
DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||||
if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, Derived>(1, size, func);
|
if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
|
||||||
else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
|
else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||||
@ -158,10 +158,10 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
|||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename CustomNullaryOp>
|
template<typename CustomNullaryOp>
|
||||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
|
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||||
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
|
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
|
||||||
{
|
{
|
||||||
return CwiseNullaryOp<CustomNullaryOp, Derived>(RowsAtCompileTime, ColsAtCompileTime, func);
|
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns an expression of a constant matrix of value \a value
|
/** \returns an expression of a constant matrix of value \a value
|
||||||
|
@ -49,6 +49,8 @@ template<typename Derived> class DenseBase
|
|||||||
public:
|
public:
|
||||||
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
|
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
|
||||||
|
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||||
|
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator/;
|
||||||
|
|
||||||
|
|
||||||
/** Inner iterator type to iterate over the coefficients of a row or column.
|
/** Inner iterator type to iterate over the coefficients of a row or column.
|
||||||
@ -179,6 +181,35 @@ template<typename Derived> class DenseBase
|
|||||||
|
|
||||||
enum { IsPlainObjectBase = 0 };
|
enum { IsPlainObjectBase = 0 };
|
||||||
|
|
||||||
|
/** The plain matrix type corresponding to this expression.
|
||||||
|
* \sa PlainObject */
|
||||||
|
typedef Matrix<typename internal::traits<Derived>::Scalar,
|
||||||
|
internal::traits<Derived>::RowsAtCompileTime,
|
||||||
|
internal::traits<Derived>::ColsAtCompileTime,
|
||||||
|
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||||
|
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||||
|
internal::traits<Derived>::MaxColsAtCompileTime
|
||||||
|
> PlainMatrix;
|
||||||
|
|
||||||
|
/** The plain array type corresponding to this expression.
|
||||||
|
* \sa PlainObject */
|
||||||
|
typedef Array<typename internal::traits<Derived>::Scalar,
|
||||||
|
internal::traits<Derived>::RowsAtCompileTime,
|
||||||
|
internal::traits<Derived>::ColsAtCompileTime,
|
||||||
|
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||||
|
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||||
|
internal::traits<Derived>::MaxColsAtCompileTime
|
||||||
|
> PlainArray;
|
||||||
|
|
||||||
|
/** \brief The plain matrix or array type corresponding to this expression.
|
||||||
|
*
|
||||||
|
* This is not necessarily exactly the return type of eval(). In the case of plain matrices,
|
||||||
|
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
|
||||||
|
* that the return type of eval() is either PlainObject or const PlainObject&.
|
||||||
|
*/
|
||||||
|
typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
|
||||||
|
PlainMatrix, PlainArray>::type PlainObject;
|
||||||
|
|
||||||
/** \returns the number of nonzero coefficients which is in practice the number
|
/** \returns the number of nonzero coefficients which is in practice the number
|
||||||
* of stored coefficients. */
|
* of stored coefficients. */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -237,13 +268,12 @@ template<typename Derived> class DenseBase
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||||
|
|
||||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
|
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||||
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
|
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
|
||||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
|
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,PlainObject> SequentialLinSpacedReturnType;
|
||||||
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
|
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
|
||||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
|
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
|
||||||
/** \internal the return type of MatrixBase::eigenvalues() */
|
/** \internal the return type of MatrixBase::eigenvalues() */
|
||||||
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||||
|
|
||||||
@ -322,13 +352,13 @@ template<typename Derived> class DenseBase
|
|||||||
LinSpaced(const Scalar& low, const Scalar& high);
|
LinSpaced(const Scalar& low, const Scalar& high);
|
||||||
|
|
||||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||||
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
|
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
|
||||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||||
NullaryExpr(Index size, const CustomNullaryOp& func);
|
NullaryExpr(Index size, const CustomNullaryOp& func);
|
||||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||||
NullaryExpr(const CustomNullaryOp& func);
|
NullaryExpr(const CustomNullaryOp& func);
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
|
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
|
||||||
@ -466,9 +496,10 @@ template<typename Derived> class DenseBase
|
|||||||
ConstColwiseReturnType colwise() const;
|
ConstColwiseReturnType colwise() const;
|
||||||
ColwiseReturnType colwise();
|
ColwiseReturnType colwise();
|
||||||
|
|
||||||
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
|
typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
|
||||||
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
|
static const RandomReturnType Random(Index rows, Index cols);
|
||||||
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();
|
static const RandomReturnType Random(Index size);
|
||||||
|
static const RandomReturnType Random();
|
||||||
|
|
||||||
template<typename ThenDerived,typename ElseDerived>
|
template<typename ThenDerived,typename ElseDerived>
|
||||||
const Select<Derived,ThenDerived,ElseDerived>
|
const Select<Derived,ThenDerived,ElseDerived>
|
||||||
|
@ -349,6 +349,7 @@ struct hypot_retval
|
|||||||
template<typename OldType, typename NewType>
|
template<typename OldType, typename NewType>
|
||||||
struct cast_impl
|
struct cast_impl
|
||||||
{
|
{
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
static inline NewType run(const OldType& x)
|
static inline NewType run(const OldType& x)
|
||||||
{
|
{
|
||||||
return static_cast<NewType>(x);
|
return static_cast<NewType>(x);
|
||||||
@ -360,6 +361,7 @@ struct cast_impl
|
|||||||
template<typename OldType, typename NewType>
|
template<typename OldType, typename NewType>
|
||||||
inline NewType cast(const OldType& x)
|
inline NewType cast(const OldType& x)
|
||||||
{
|
{
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
return cast_impl<OldType, NewType>::run(x);
|
return cast_impl<OldType, NewType>::run(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,6 +81,7 @@ template<typename Derived> class MatrixBase
|
|||||||
using Base::operator*=;
|
using Base::operator*=;
|
||||||
using Base::operator/=;
|
using Base::operator/=;
|
||||||
using Base::operator*;
|
using Base::operator*;
|
||||||
|
using Base::operator/;
|
||||||
|
|
||||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
|
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
|
||||||
@ -101,23 +102,11 @@ template<typename Derived> class MatrixBase
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
|
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
|
||||||
|
|
||||||
/** \brief The plain matrix type corresponding to this expression.
|
typedef typename Base::PlainObject PlainObject;
|
||||||
*
|
|
||||||
* This is not necessarily exactly the return type of eval(). In the case of plain matrices,
|
|
||||||
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
|
|
||||||
* that the return type of eval() is either PlainObject or const PlainObject&.
|
|
||||||
*/
|
|
||||||
typedef Matrix<typename internal::traits<Derived>::Scalar,
|
|
||||||
internal::traits<Derived>::RowsAtCompileTime,
|
|
||||||
internal::traits<Derived>::ColsAtCompileTime,
|
|
||||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
|
||||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
|
||||||
internal::traits<Derived>::MaxColsAtCompileTime
|
|
||||||
> PlainObject;
|
|
||||||
|
|
||||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
|
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||||
/** \internal the return type of MatrixBase::adjoint() */
|
/** \internal the return type of MatrixBase::adjoint() */
|
||||||
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
||||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
||||||
@ -126,7 +115,7 @@ template<typename Derived> class MatrixBase
|
|||||||
/** \internal Return type of eigenvalues() */
|
/** \internal Return type of eigenvalues() */
|
||||||
typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
|
typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
|
||||||
/** \internal the return type of identity */
|
/** \internal the return type of identity */
|
||||||
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,Derived> IdentityReturnType;
|
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
|
||||||
/** \internal the return type of unit vectors */
|
/** \internal the return type of unit vectors */
|
||||||
typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
|
typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
|
||||||
internal::traits<Derived>::RowsAtCompileTime,
|
internal::traits<Derived>::RowsAtCompileTime,
|
||||||
|
5
Eigen/src/Core/ProductEvaluators.h
Normal file → Executable file
5
Eigen/src/Core/ProductEvaluators.h
Normal file → Executable file
@ -751,7 +751,6 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|||||||
using Base::m_diagImpl;
|
using Base::m_diagImpl;
|
||||||
using Base::m_matImpl;
|
using Base::m_matImpl;
|
||||||
using Base::coeff;
|
using Base::coeff;
|
||||||
using Base::packet_impl;
|
|
||||||
typedef typename Base::Scalar Scalar;
|
typedef typename Base::Scalar Scalar;
|
||||||
typedef typename Base::PacketScalar PacketScalar;
|
typedef typename Base::PacketScalar PacketScalar;
|
||||||
|
|
||||||
@ -776,7 +775,8 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
|
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
// NVCC complains about template keyword, so we disable this function in CUDA mode
|
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
||||||
|
// See also similar calls below.
|
||||||
return this->template packet_impl<LoadMode>(row,col, row,
|
return this->template packet_impl<LoadMode>(row,col, row,
|
||||||
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
||||||
}
|
}
|
||||||
@ -798,7 +798,6 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
|||||||
using Base::m_diagImpl;
|
using Base::m_diagImpl;
|
||||||
using Base::m_matImpl;
|
using Base::m_matImpl;
|
||||||
using Base::coeff;
|
using Base::coeff;
|
||||||
using Base::packet_impl;
|
|
||||||
typedef typename Base::Scalar Scalar;
|
typedef typename Base::Scalar Scalar;
|
||||||
typedef typename Base::PacketScalar PacketScalar;
|
typedef typename Base::PacketScalar PacketScalar;
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ struct functor_traits<scalar_random_op<Scalar> >
|
|||||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
|
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
inline const typename DenseBase<Derived>::RandomReturnType
|
||||||
DenseBase<Derived>::Random(Index rows, Index cols)
|
DenseBase<Derived>::Random(Index rows, Index cols)
|
||||||
{
|
{
|
||||||
return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
|
return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
|
||||||
@ -84,7 +84,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
|
|||||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
|
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
inline const typename DenseBase<Derived>::RandomReturnType
|
||||||
DenseBase<Derived>::Random(Index size)
|
DenseBase<Derived>::Random(Index size)
|
||||||
{
|
{
|
||||||
return NullaryExpr(size, internal::scalar_random_op<Scalar>());
|
return NullaryExpr(size, internal::scalar_random_op<Scalar>());
|
||||||
@ -110,7 +110,7 @@ DenseBase<Derived>::Random(Index size)
|
|||||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
|
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
inline const typename DenseBase<Derived>::RandomReturnType
|
||||||
DenseBase<Derived>::Random()
|
DenseBase<Derived>::Random()
|
||||||
{
|
{
|
||||||
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
|
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
|
||||||
|
@ -70,10 +70,6 @@ template<typename MatrixType, int Direction> class Reverse
|
|||||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||||
using Base::IsRowMajor;
|
using Base::IsRowMajor;
|
||||||
|
|
||||||
// next line is necessary because otherwise const version of operator()
|
|
||||||
// is hidden by non-const version defined in this file
|
|
||||||
using Base::operator();
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
enum {
|
enum {
|
||||||
PacketSize = internal::packet_traits<Scalar>::size,
|
PacketSize = internal::packet_traits<Scalar>::size,
|
||||||
@ -101,69 +97,6 @@ template<typename MatrixType, int Direction> class Reverse
|
|||||||
return -m_matrix.innerStride();
|
return -m_matrix.innerStride();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col)
|
|
||||||
{
|
|
||||||
eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
|
|
||||||
return coeffRef(row, col);
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
|
|
||||||
{
|
|
||||||
return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
|
|
||||||
ReverseCol ? m_matrix.cols() - col - 1 : col);
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const
|
|
||||||
{
|
|
||||||
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
|
|
||||||
ReverseCol ? m_matrix.cols() - col - 1 : col);
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const
|
|
||||||
{
|
|
||||||
return m_matrix.coeff(m_matrix.size() - index - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
|
|
||||||
{
|
|
||||||
return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline Scalar& operator()(Index index)
|
|
||||||
{
|
|
||||||
eigen_assert(index >= 0 && index < m_matrix.size());
|
|
||||||
return coeffRef(index);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline const PacketScalar packet(Index row, Index col) const
|
|
||||||
{
|
|
||||||
return reverse_packet::run(m_matrix.template packet<LoadMode>(
|
|
||||||
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
|
|
||||||
ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline void writePacket(Index row, Index col, const PacketScalar& x)
|
|
||||||
{
|
|
||||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(
|
|
||||||
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
|
|
||||||
ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
|
|
||||||
reverse_packet::run(x));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline const PacketScalar packet(Index index) const
|
|
||||||
{
|
|
||||||
return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int LoadMode>
|
|
||||||
inline void writePacket(Index index, const PacketScalar& x)
|
|
||||||
{
|
|
||||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
|
EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||||
nestedExpression() const
|
nestedExpression() const
|
||||||
{
|
{
|
||||||
|
@ -157,6 +157,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
|||||||
MatrixBase<Derived>::stableNorm() const
|
MatrixBase<Derived>::stableNorm() const
|
||||||
{
|
{
|
||||||
using std::sqrt;
|
using std::sqrt;
|
||||||
|
using std::abs;
|
||||||
const Index blockSize = 4096;
|
const Index blockSize = 4096;
|
||||||
RealScalar scale(0);
|
RealScalar scale(0);
|
||||||
RealScalar invScale(1);
|
RealScalar invScale(1);
|
||||||
@ -164,12 +165,18 @@ MatrixBase<Derived>::stableNorm() const
|
|||||||
enum {
|
enum {
|
||||||
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
|
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
|
||||||
};
|
};
|
||||||
|
typedef typename internal::conditional<Alignment, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, Aligned>,
|
||||||
|
typename Base::ConstSegmentReturnType>::type SegmentWrapper;
|
||||||
Index n = size();
|
Index n = size();
|
||||||
|
|
||||||
|
if(n==1)
|
||||||
|
return abs(this->coeff(0));
|
||||||
|
|
||||||
Index bi = internal::first_aligned(derived());
|
Index bi = internal::first_aligned(derived());
|
||||||
if (bi>0)
|
if (bi>0)
|
||||||
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
|
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
|
||||||
for (; bi<n; bi+=blockSize)
|
for (; bi<n; bi+=blockSize)
|
||||||
internal::stable_norm_kernel(this->segment(bi,numext::mini(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
|
internal::stable_norm_kernel(SegmentWrapper(this->segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
|
||||||
return scale * sqrt(ssq);
|
return scale * sqrt(ssq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -392,6 +392,18 @@ template<typename Scalar>
|
|||||||
struct functor_traits<scalar_quotient1_op<Scalar> >
|
struct functor_traits<scalar_quotient1_op<Scalar> >
|
||||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
||||||
|
|
||||||
|
template<typename Scalar1, typename Scalar2>
|
||||||
|
struct scalar_quotient2_op {
|
||||||
|
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; }
|
||||||
|
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
|
||||||
|
};
|
||||||
|
template<typename Scalar1,typename Scalar2>
|
||||||
|
struct functor_traits<scalar_quotient2_op<Scalar1,Scalar2> >
|
||||||
|
{ enum { Cost = 2 * NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
|
||||||
|
|
||||||
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
|
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
|
||||||
// where the mixing of different types is handled by scalar_product_traits
|
// where the mixing of different types is handled by scalar_product_traits
|
||||||
// In particular, real * complex<real> is allowed.
|
// In particular, real * complex<real> is allowed.
|
||||||
|
25
Eigen/src/Core/products/GeneralMatrixVector_MKL.h
Normal file → Executable file
25
Eigen/src/Core/products/GeneralMatrixVector_MKL.h
Normal file → Executable file
@ -46,38 +46,37 @@ namespace internal {
|
|||||||
|
|
||||||
// gemv specialization
|
// gemv specialization
|
||||||
|
|
||||||
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||||
struct general_matrix_vector_product_gemv :
|
struct general_matrix_vector_product_gemv;
|
||||||
general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
|
|
||||||
|
|
||||||
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
|
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
|
||||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||||
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
|
||||||
static void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const Scalar* lhs, Index lhsStride, \
|
const const_blas_data_mapper<Scalar,Index,ColMajor> &lhs, \
|
||||||
const Scalar* rhs, Index rhsIncr, \
|
const const_blas_data_mapper<Scalar,Index,RowMajor> &rhs, \
|
||||||
Scalar* res, Index resIncr, Scalar alpha) \
|
Scalar* res, Index resIncr, Scalar alpha) \
|
||||||
{ \
|
{ \
|
||||||
if (ConjugateLhs) { \
|
if (ConjugateLhs) { \
|
||||||
general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
|
general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,BuiltIn>::run( \
|
||||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
rows, cols, lhs, rhs, res, resIncr, alpha); \
|
||||||
} else { \
|
} else { \
|
||||||
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
||||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
}; \
|
}; \
|
||||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||||
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ConjugateRhs,Specialized> { \
|
||||||
static void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const Scalar* lhs, Index lhsStride, \
|
const const_blas_data_mapper<Scalar,Index,RowMajor> &lhs, \
|
||||||
const Scalar* rhs, Index rhsIncr, \
|
const const_blas_data_mapper<Scalar,Index,ColMajor> &rhs, \
|
||||||
Scalar* res, Index resIncr, Scalar alpha) \
|
Scalar* res, Index resIncr, Scalar alpha) \
|
||||||
{ \
|
{ \
|
||||||
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
||||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
||||||
} \
|
} \
|
||||||
}; \
|
}; \
|
||||||
|
|
||||||
|
4
Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
Normal file → Executable file
4
Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
Normal file → Executable file
@ -122,7 +122,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
|||||||
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
|
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
|
||||||
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
|
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
|
||||||
MKL_INT aStride = aa_tmp.outerStride(); \
|
MKL_INT aStride = aa_tmp.outerStride(); \
|
||||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
|
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
|
||||||
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
||||||
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
|
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
|
||||||
\
|
\
|
||||||
@ -236,7 +236,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
|||||||
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
|
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
|
||||||
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
|
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
|
||||||
MKL_INT aStride = aa_tmp.outerStride(); \
|
MKL_INT aStride = aa_tmp.outerStride(); \
|
||||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
|
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
|
||||||
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
||||||
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
|
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
|
||||||
\
|
\
|
||||||
|
1
Eigen/src/Core/util/BlasUtil.h
Normal file → Executable file
1
Eigen/src/Core/util/BlasUtil.h
Normal file → Executable file
@ -224,6 +224,7 @@ class blas_data_mapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Index stride() const { return m_stride; }
|
const Index stride() const { return m_stride; }
|
||||||
|
const Scalar* data() const { return m_data; }
|
||||||
|
|
||||||
Index firstAligned(Index size) const {
|
Index firstAligned(Index size) const {
|
||||||
if (size_t(m_data)%sizeof(Scalar)) {
|
if (size_t(m_data)%sizeof(Scalar)) {
|
||||||
|
@ -213,6 +213,7 @@ template<typename Scalar> struct scalar_identity_op;
|
|||||||
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
|
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
|
||||||
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
|
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
|
||||||
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
|
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
|
||||||
|
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient2_op;
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
@ -645,7 +645,7 @@ namespace Eigen {
|
|||||||
// just an empty macro !
|
// just an empty macro !
|
||||||
#define EIGEN_EMPTY
|
#define EIGEN_EMPTY
|
||||||
|
|
||||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1800 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
|
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
|
||||||
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
|
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
|
||||||
using Base::operator =;
|
using Base::operator =;
|
||||||
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
|
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
|
||||||
|
@ -427,7 +427,9 @@ struct special_scalar_op_base : public DenseCoeffsBase<Derived>
|
|||||||
{
|
{
|
||||||
// dummy operator* so that the
|
// dummy operator* so that the
|
||||||
// "using special_scalar_op_base::operator*" compiles
|
// "using special_scalar_op_base::operator*" compiles
|
||||||
void operator*() const;
|
struct dummy {};
|
||||||
|
void operator*(dummy) const;
|
||||||
|
void operator/(dummy) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived,typename Scalar,typename OtherScalar>
|
template<typename Derived,typename Scalar,typename OtherScalar>
|
||||||
@ -451,6 +453,16 @@ struct special_scalar_op_base<Derived,Scalar,OtherScalar,true> : public DenseCo
|
|||||||
#endif
|
#endif
|
||||||
return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
|
return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
|
||||||
|
operator/(const OtherScalar& scalar) const
|
||||||
|
{
|
||||||
|
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||||
|
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||||
|
#endif
|
||||||
|
return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
|
||||||
|
(*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename XprType, typename CastType> struct cast_return_type
|
template<typename XprType, typename CastType> struct cast_return_type
|
||||||
|
@ -486,10 +486,11 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag
|
|||||||
|
|
||||||
while (end>0)
|
while (end>0)
|
||||||
{
|
{
|
||||||
|
EIGEN_ASM_COMMENT("beginabs");
|
||||||
for (Index i = start; i<end; ++i)
|
for (Index i = start; i<end; ++i)
|
||||||
if (internal::isMuchSmallerThan(abs(subdiag[i]),(abs(diag[i])+abs(diag[i+1]))) || abs(subdiag[i]) <= considerAsZero)
|
if (internal::isMuchSmallerThan(abs(subdiag[i]),(abs(diag[i])+abs(diag[i+1]))) || abs(subdiag[i]) <= considerAsZero)
|
||||||
subdiag[i] = 0;
|
subdiag[i] = 0;
|
||||||
|
EIGEN_ASM_COMMENT("endabs");
|
||||||
// find the largest unreduced block
|
// find the largest unreduced block
|
||||||
while (end>0 && subdiag[end-1]==0)
|
while (end>0 && subdiag[end-1]==0)
|
||||||
{
|
{
|
||||||
|
@ -464,9 +464,10 @@ struct tridiagonalization_inplace_selector<MatrixType,3,false>
|
|||||||
static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
|
static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
|
||||||
{
|
{
|
||||||
using std::sqrt;
|
using std::sqrt;
|
||||||
|
const RealScalar tol = (std::numeric_limits<RealScalar>::min)();
|
||||||
diag[0] = mat(0,0);
|
diag[0] = mat(0,0);
|
||||||
RealScalar v1norm2 = numext::abs2(mat(2,0));
|
RealScalar v1norm2 = numext::abs2(mat(2,0));
|
||||||
if(v1norm2 == RealScalar(0))
|
if(v1norm2 <= tol)
|
||||||
{
|
{
|
||||||
diag[1] = mat(1,1);
|
diag[1] = mat(1,1);
|
||||||
diag[2] = mat(2,2);
|
diag[2] = mat(2,2);
|
||||||
|
@ -18,6 +18,10 @@ namespace Eigen {
|
|||||||
* \returns the cross product of \c *this and \a other
|
* \returns the cross product of \c *this and \a other
|
||||||
*
|
*
|
||||||
* Here is a very good explanation of cross-product: http://xkcd.com/199/
|
* Here is a very good explanation of cross-product: http://xkcd.com/199/
|
||||||
|
*
|
||||||
|
* With complex numbers, the cross product is implemented as
|
||||||
|
* \f$ (\mathbf{a}+i\mathbf{b}) \times (\mathbf{c}+i\mathbf{d}) = (\mathbf{a} \times \mathbf{c} - \mathbf{b} \times \mathbf{d}) - i(\mathbf{a} \times \mathbf{d} - \mathbf{b} \times \mathbf{c})\f$
|
||||||
|
*
|
||||||
* \sa MatrixBase::cross3()
|
* \sa MatrixBase::cross3()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
|
@ -75,8 +75,9 @@ void MatrixBase<Derived>::makeHouseholder(
|
|||||||
|
|
||||||
RealScalar tailSqNorm = size()==1 ? RealScalar(0) : tail.squaredNorm();
|
RealScalar tailSqNorm = size()==1 ? RealScalar(0) : tail.squaredNorm();
|
||||||
Scalar c0 = coeff(0);
|
Scalar c0 = coeff(0);
|
||||||
|
const RealScalar tol = (std::numeric_limits<RealScalar>::min)();
|
||||||
|
|
||||||
if(tailSqNorm == RealScalar(0) && numext::imag(c0)==RealScalar(0))
|
if(tailSqNorm <= tol && numext::abs2(numext::imag(c0))<=tol)
|
||||||
{
|
{
|
||||||
tau = RealScalar(0);
|
tau = RealScalar(0);
|
||||||
beta = numext::real(c0);
|
beta = numext::real(c0);
|
||||||
|
@ -136,6 +136,12 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
|
|||||||
* and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
|
* and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
|
||||||
* and NumTraits<Scalar>::epsilon() for the tolerance.
|
* and NumTraits<Scalar>::epsilon() for the tolerance.
|
||||||
*
|
*
|
||||||
|
* The tolerance corresponds to the relative residual error: |Ax-b|/|b|
|
||||||
|
*
|
||||||
|
* \b Performance: when using sparse matrices, best performance is achied for a row-major sparse matrix format.
|
||||||
|
* Moreover, in this case multi-threading can be exploited if the user code is compiled with OpenMP enabled.
|
||||||
|
* See \ref TopicMultiThreading for details.
|
||||||
|
*
|
||||||
* This class can be used as the direct solver classes. Here is a typical usage example:
|
* This class can be used as the direct solver classes. Here is a typical usage example:
|
||||||
* \include BiCGSTAB_simple.cpp
|
* \include BiCGSTAB_simple.cpp
|
||||||
*
|
*
|
||||||
|
@ -114,20 +114,28 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
|
|||||||
*
|
*
|
||||||
* \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix.
|
* \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix.
|
||||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower,
|
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower,
|
||||||
* Upper, or Lower|Upper in which the full matrix entries will be considered. Default is Lower.
|
* \c Upper, or \c Lower|Upper in which the full matrix entries will be considered.
|
||||||
|
* Default is \c Lower, best performance is \c Lower|Upper.
|
||||||
* \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
|
* \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
|
||||||
*
|
*
|
||||||
* The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
|
* The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
|
||||||
* and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
|
* and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
|
||||||
* and NumTraits<Scalar>::epsilon() for the tolerance.
|
* and NumTraits<Scalar>::epsilon() for the tolerance.
|
||||||
*
|
*
|
||||||
|
* The tolerance corresponds to the relative residual error: |Ax-b|/|b|
|
||||||
|
*
|
||||||
|
* \b Performance: Even though the default value of \c _UpLo is \c Lower, significantly higher performance is
|
||||||
|
* achieved when using a complete matrix and \b Lower|Upper as the \a _UpLo template parameter. Moreover, in this
|
||||||
|
* case multi-threading can be exploited if the user code is compiled with OpenMP enabled.
|
||||||
|
* See \ref TopicMultiThreading for details.
|
||||||
|
*
|
||||||
* This class can be used as the direct solver classes. Here is a typical usage example:
|
* This class can be used as the direct solver classes. Here is a typical usage example:
|
||||||
\code
|
\code
|
||||||
int n = 10000;
|
int n = 10000;
|
||||||
VectorXd x(n), b(n);
|
VectorXd x(n), b(n);
|
||||||
SparseMatrix<double> A(n,n);
|
SparseMatrix<double> A(n,n);
|
||||||
// fill A and b
|
// fill A and b
|
||||||
ConjugateGradient<SparseMatrix<double> > cg;
|
ConjugateGradient<SparseMatrix<double>, Lower|Upper> cg;
|
||||||
cg.compute(A);
|
cg.compute(A);
|
||||||
x = cg.solve(b);
|
x = cg.solve(b);
|
||||||
std::cout << "#iterations: " << cg.iterations() << std::endl;
|
std::cout << "#iterations: " << cg.iterations() << std::endl;
|
||||||
@ -183,10 +191,13 @@ public:
|
|||||||
template<typename Rhs,typename Dest>
|
template<typename Rhs,typename Dest>
|
||||||
void _solve_with_guess_impl(const Rhs& b, Dest& x) const
|
void _solve_with_guess_impl(const Rhs& b, Dest& x) const
|
||||||
{
|
{
|
||||||
|
typedef Ref<const MatrixType> MatRef;
|
||||||
|
typedef typename internal::conditional<UpLo==(Lower|Upper) && (!MatrixType::IsRowMajor) && (!NumTraits<Scalar>::IsComplex),
|
||||||
|
Transpose<const MatRef>, MatRef const&>::type RowMajorWrapper;
|
||||||
typedef typename internal::conditional<UpLo==(Lower|Upper),
|
typedef typename internal::conditional<UpLo==(Lower|Upper),
|
||||||
Ref<const MatrixType>&,
|
RowMajorWrapper,
|
||||||
typename Ref<const MatrixType>::template ConstSelfAdjointViewReturnType<UpLo>::Type
|
typename MatRef::template ConstSelfAdjointViewReturnType<UpLo>::Type
|
||||||
>::type MatrixWrapperType;
|
>::type SelfAdjointWrapper;
|
||||||
m_iterations = Base::maxIterations();
|
m_iterations = Base::maxIterations();
|
||||||
m_error = Base::m_tolerance;
|
m_error = Base::m_tolerance;
|
||||||
|
|
||||||
@ -196,7 +207,8 @@ public:
|
|||||||
m_error = Base::m_tolerance;
|
m_error = Base::m_tolerance;
|
||||||
|
|
||||||
typename Dest::ColXpr xj(x,j);
|
typename Dest::ColXpr xj(x,j);
|
||||||
internal::conjugate_gradient(MatrixWrapperType(mp_matrix), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error);
|
RowMajorWrapper row_mat(mp_matrix);
|
||||||
|
internal::conjugate_gradient(SelfAdjointWrapper(row_mat), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_isInitialized = true;
|
m_isInitialized = true;
|
||||||
|
@ -126,10 +126,16 @@ public:
|
|||||||
/** \internal */
|
/** \internal */
|
||||||
Index cols() const { return mp_matrix.cols(); }
|
Index cols() const { return mp_matrix.cols(); }
|
||||||
|
|
||||||
/** \returns the tolerance threshold used by the stopping criteria */
|
/** \returns the tolerance threshold used by the stopping criteria.
|
||||||
|
* \sa setTolerance()
|
||||||
|
*/
|
||||||
RealScalar tolerance() const { return m_tolerance; }
|
RealScalar tolerance() const { return m_tolerance; }
|
||||||
|
|
||||||
/** Sets the tolerance threshold used by the stopping criteria */
|
/** Sets the tolerance threshold used by the stopping criteria.
|
||||||
|
*
|
||||||
|
* This value is used as an upper bound to the relative residual error: |Ax-b|/|b|.
|
||||||
|
* The default value is the machine precision given by NumTraits<Scalar>::epsilon()
|
||||||
|
*/
|
||||||
Derived& setTolerance(const RealScalar& tolerance)
|
Derived& setTolerance(const RealScalar& tolerance)
|
||||||
{
|
{
|
||||||
m_tolerance = tolerance;
|
m_tolerance = tolerance;
|
||||||
@ -167,7 +173,9 @@ public:
|
|||||||
return m_iterations;
|
return m_iterations;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \returns the tolerance error reached during the last solve */
|
/** \returns the tolerance error reached during the last solve.
|
||||||
|
* It is a close approximation of the true relative residual error |Ax-b|/|b|.
|
||||||
|
*/
|
||||||
RealScalar error() const
|
RealScalar error() const
|
||||||
{
|
{
|
||||||
eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
|
eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
|
||||||
|
49
Eigen/src/PardisoSupport/PardisoSupport.h
Normal file → Executable file
49
Eigen/src/PardisoSupport/PardisoSupport.h
Normal file → Executable file
@ -54,7 +54,7 @@ namespace internal
|
|||||||
template<>
|
template<>
|
||||||
struct pardiso_run_selector<long long int>
|
struct pardiso_run_selector<long long int>
|
||||||
{
|
{
|
||||||
typedef long long int IndexTypeType;
|
typedef long long int IndexType;
|
||||||
static IndexType run( _MKL_DSS_HANDLE_t pt, IndexType maxfct, IndexType mnum, IndexType type, IndexType phase, IndexType n, void *a,
|
static IndexType run( _MKL_DSS_HANDLE_t pt, IndexType maxfct, IndexType mnum, IndexType type, IndexType phase, IndexType n, void *a,
|
||||||
IndexType *ia, IndexType *ja, IndexType *perm, IndexType nrhs, IndexType *iparm, IndexType msglvl, void *b, void *x)
|
IndexType *ia, IndexType *ja, IndexType *perm, IndexType nrhs, IndexType *iparm, IndexType msglvl, void *b, void *x)
|
||||||
{
|
{
|
||||||
@ -93,19 +93,19 @@ namespace internal
|
|||||||
typedef typename _MatrixType::StorageIndex StorageIndex;
|
typedef typename _MatrixType::StorageIndex StorageIndex;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // end namespace internal
|
||||||
|
|
||||||
template<class Derived>
|
template<class Derived>
|
||||||
class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
|
class PardisoImpl : public SparseSolverBase<Derived>
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
typedef SparseSolveBase<PardisoImpl<Derived> Base;
|
typedef SparseSolverBase<Derived> Base;
|
||||||
using Base::derived;
|
using Base::derived;
|
||||||
using Base::m_isInitialized;
|
using Base::m_isInitialized;
|
||||||
|
|
||||||
typedef internal::pardiso_traits<Derived> Traits;
|
typedef internal::pardiso_traits<Derived> Traits;
|
||||||
public:
|
public:
|
||||||
using base::_solve_impl;
|
using Base::_solve_impl;
|
||||||
|
|
||||||
typedef typename Traits::MatrixType MatrixType;
|
typedef typename Traits::MatrixType MatrixType;
|
||||||
typedef typename Traits::Scalar Scalar;
|
typedef typename Traits::Scalar Scalar;
|
||||||
@ -173,16 +173,17 @@ class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
|
|||||||
|
|
||||||
Derived& compute(const MatrixType& matrix);
|
Derived& compute(const MatrixType& matrix);
|
||||||
|
|
||||||
template<typename BDerived, typename XDerived>
|
template<typename Rhs,typename Dest>
|
||||||
bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const;
|
void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void pardisoRelease()
|
void pardisoRelease()
|
||||||
{
|
{
|
||||||
if(m_isInitialized) // Factorization ran at least once
|
if(m_isInitialized) // Factorization ran at least once
|
||||||
{
|
{
|
||||||
internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, -1, m_size, 0, 0, 0, m_perm.data(), 0,
|
internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, -1, m_size,0, 0, 0, m_perm.data(), 0,
|
||||||
m_iparm.data(), m_msglvl, 0, 0);
|
m_iparm.data(), m_msglvl, NULL, NULL);
|
||||||
|
m_isInitialized = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -217,12 +218,14 @@ class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
|
|||||||
m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0;
|
m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0;
|
||||||
m_iparm[34] = 1; // C indexing
|
m_iparm[34] = 1; // C indexing
|
||||||
m_iparm[59] = 1; // Automatic switch between In-Core and Out-of-Core modes
|
m_iparm[59] = 1; // Automatic switch between In-Core and Out-of-Core modes
|
||||||
|
|
||||||
|
memset(m_pt, 0, sizeof(m_pt));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// cached data to reduce reallocation, etc.
|
// cached data to reduce reallocation, etc.
|
||||||
|
|
||||||
void manageErrorCode(Index error)
|
void manageErrorCode(Index error) const
|
||||||
{
|
{
|
||||||
switch(error)
|
switch(error)
|
||||||
{
|
{
|
||||||
@ -239,7 +242,7 @@ class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
|
|||||||
}
|
}
|
||||||
|
|
||||||
mutable SparseMatrixType m_matrix;
|
mutable SparseMatrixType m_matrix;
|
||||||
ComputationInfo m_info;
|
mutable ComputationInfo m_info;
|
||||||
bool m_analysisIsOk, m_factorizationIsOk;
|
bool m_analysisIsOk, m_factorizationIsOk;
|
||||||
Index m_type, m_msglvl;
|
Index m_type, m_msglvl;
|
||||||
mutable void *m_pt[64];
|
mutable void *m_pt[64];
|
||||||
@ -256,7 +259,6 @@ Derived& PardisoImpl<Derived>::compute(const MatrixType& a)
|
|||||||
eigen_assert(a.rows() == a.cols());
|
eigen_assert(a.rows() == a.cols());
|
||||||
|
|
||||||
pardisoRelease();
|
pardisoRelease();
|
||||||
memset(m_pt, 0, sizeof(m_pt));
|
|
||||||
m_perm.setZero(m_size);
|
m_perm.setZero(m_size);
|
||||||
derived().getMatrix(a);
|
derived().getMatrix(a);
|
||||||
|
|
||||||
@ -279,7 +281,6 @@ Derived& PardisoImpl<Derived>::analyzePattern(const MatrixType& a)
|
|||||||
eigen_assert(m_size == a.cols());
|
eigen_assert(m_size == a.cols());
|
||||||
|
|
||||||
pardisoRelease();
|
pardisoRelease();
|
||||||
memset(m_pt, 0, sizeof(m_pt));
|
|
||||||
m_perm.setZero(m_size);
|
m_perm.setZero(m_size);
|
||||||
derived().getMatrix(a);
|
derived().getMatrix(a);
|
||||||
|
|
||||||
@ -313,12 +314,15 @@ Derived& PardisoImpl<Derived>::factorize(const MatrixType& a)
|
|||||||
return derived();
|
return derived();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Base>
|
template<class Derived>
|
||||||
template<typename BDerived,typename XDerived>
|
template<typename BDerived,typename XDerived>
|
||||||
bool PardisoImpl<Base>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const
|
void PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const
|
||||||
{
|
{
|
||||||
if(m_iparm[0] == 0) // Factorization was not computed
|
if(m_iparm[0] == 0) // Factorization was not computed
|
||||||
return false;
|
{
|
||||||
|
m_info = InvalidInput;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
//Index n = m_matrix.rows();
|
//Index n = m_matrix.rows();
|
||||||
Index nrhs = Index(b.cols());
|
Index nrhs = Index(b.cols());
|
||||||
@ -353,7 +357,7 @@ bool PardisoImpl<Base>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XD
|
|||||||
m_perm.data(), nrhs, m_iparm.data(), m_msglvl,
|
m_perm.data(), nrhs, m_iparm.data(), m_msglvl,
|
||||||
rhs_ptr, x.derived().data());
|
rhs_ptr, x.derived().data());
|
||||||
|
|
||||||
return error==0;
|
manageErrorCode(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -373,7 +377,7 @@ template<typename MatrixType>
|
|||||||
class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
|
class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
typedef PardisoImpl< PardisoLU<MatrixType> > Base;
|
typedef PardisoImpl<PardisoLU> Base;
|
||||||
typedef typename Base::Scalar Scalar;
|
typedef typename Base::Scalar Scalar;
|
||||||
typedef typename Base::RealScalar RealScalar;
|
typedef typename Base::RealScalar RealScalar;
|
||||||
using Base::pardisoInit;
|
using Base::pardisoInit;
|
||||||
@ -401,6 +405,7 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
|
|||||||
void getMatrix(const MatrixType& matrix)
|
void getMatrix(const MatrixType& matrix)
|
||||||
{
|
{
|
||||||
m_matrix = matrix;
|
m_matrix = matrix;
|
||||||
|
m_matrix.makeCompressed();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -424,7 +429,6 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
|
|||||||
protected:
|
protected:
|
||||||
typedef PardisoImpl< PardisoLLT<MatrixType,_UpLo> > Base;
|
typedef PardisoImpl< PardisoLLT<MatrixType,_UpLo> > Base;
|
||||||
typedef typename Base::Scalar Scalar;
|
typedef typename Base::Scalar Scalar;
|
||||||
typedef typename Base::StorageIndex StorageIndex;
|
|
||||||
typedef typename Base::RealScalar RealScalar;
|
typedef typename Base::RealScalar RealScalar;
|
||||||
using Base::pardisoInit;
|
using Base::pardisoInit;
|
||||||
using Base::m_matrix;
|
using Base::m_matrix;
|
||||||
@ -432,9 +436,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
typedef typename Base::StorageIndex StorageIndex;
|
||||||
enum { UpLo = _UpLo };
|
enum { UpLo = _UpLo };
|
||||||
using Base::compute;
|
using Base::compute;
|
||||||
using Base::solve;
|
|
||||||
|
|
||||||
PardisoLLT()
|
PardisoLLT()
|
||||||
: Base()
|
: Base()
|
||||||
@ -457,6 +461,7 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
|
|||||||
PermutationMatrix<Dynamic,Dynamic,StorageIndex> p_null;
|
PermutationMatrix<Dynamic,Dynamic,StorageIndex> p_null;
|
||||||
m_matrix.resize(matrix.rows(), matrix.cols());
|
m_matrix.resize(matrix.rows(), matrix.cols());
|
||||||
m_matrix.template selfadjointView<Upper>() = matrix.template selfadjointView<UpLo>().twistedBy(p_null);
|
m_matrix.template selfadjointView<Upper>() = matrix.template selfadjointView<UpLo>().twistedBy(p_null);
|
||||||
|
m_matrix.makeCompressed();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -482,7 +487,6 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
|
|||||||
protected:
|
protected:
|
||||||
typedef PardisoImpl< PardisoLDLT<MatrixType,Options> > Base;
|
typedef PardisoImpl< PardisoLDLT<MatrixType,Options> > Base;
|
||||||
typedef typename Base::Scalar Scalar;
|
typedef typename Base::Scalar Scalar;
|
||||||
typedef typename Base::StorageIndex StorageIndex;
|
|
||||||
typedef typename Base::RealScalar RealScalar;
|
typedef typename Base::RealScalar RealScalar;
|
||||||
using Base::pardisoInit;
|
using Base::pardisoInit;
|
||||||
using Base::m_matrix;
|
using Base::m_matrix;
|
||||||
@ -490,8 +494,8 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
typedef typename Base::StorageIndex StorageIndex;
|
||||||
using Base::compute;
|
using Base::compute;
|
||||||
using Base::solve;
|
|
||||||
enum { UpLo = Options&(Upper|Lower) };
|
enum { UpLo = Options&(Upper|Lower) };
|
||||||
|
|
||||||
PardisoLDLT()
|
PardisoLDLT()
|
||||||
@ -513,6 +517,7 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
|
|||||||
PermutationMatrix<Dynamic,Dynamic,StorageIndex> p_null;
|
PermutationMatrix<Dynamic,Dynamic,StorageIndex> p_null;
|
||||||
m_matrix.resize(matrix.rows(), matrix.cols());
|
m_matrix.resize(matrix.rows(), matrix.cols());
|
||||||
m_matrix.template selfadjointView<Upper>() = matrix.template selfadjointView<UpLo>().twistedBy(p_null);
|
m_matrix.template selfadjointView<Upper>() = matrix.template selfadjointView<UpLo>().twistedBy(p_null);
|
||||||
|
m_matrix.makeCompressed();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -50,9 +50,9 @@ class CompressedStorage
|
|||||||
|
|
||||||
CompressedStorage& operator=(const CompressedStorage& other)
|
CompressedStorage& operator=(const CompressedStorage& other)
|
||||||
{
|
{
|
||||||
|
resize(other.size());
|
||||||
if(other.size()>0)
|
if(other.size()>0)
|
||||||
{
|
{
|
||||||
resize(other.size());
|
|
||||||
internal::smart_copy(other.m_values, other.m_values + m_size, m_values);
|
internal::smart_copy(other.m_values, other.m_values + m_size, m_values);
|
||||||
internal::smart_copy(other.m_indices, other.m_indices + m_size, m_indices);
|
internal::smart_copy(other.m_indices, other.m_indices + m_size, m_indices);
|
||||||
}
|
}
|
||||||
|
@ -16,8 +16,7 @@ template<typename Derived>
|
|||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
Derived& SparseMatrixBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
|
Derived& SparseMatrixBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
|
||||||
{
|
{
|
||||||
// TODO use the evaluator mechanism
|
internal::call_assignment_no_alias(derived(), other.derived());
|
||||||
other.derived().evalTo(derived());
|
|
||||||
return derived();
|
return derived();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,6 +181,39 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Diagonal2Sparse {};
|
||||||
|
|
||||||
|
template<> struct AssignmentKind<SparseShape,DiagonalShape> { typedef Diagonal2Sparse Kind; };
|
||||||
|
|
||||||
|
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||||
|
struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Sparse, Scalar>
|
||||||
|
{
|
||||||
|
typedef typename DstXprType::StorageIndex StorageIndex;
|
||||||
|
typedef Array<StorageIndex,Dynamic,1> ArrayXI;
|
||||||
|
typedef Array<Scalar,Dynamic,1> ArrayXS;
|
||||||
|
template<int Options>
|
||||||
|
static void run(SparseMatrix<Scalar,Options,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||||
|
{
|
||||||
|
Index size = src.diagonal().size();
|
||||||
|
dst.makeCompressed();
|
||||||
|
dst.resizeNonZeros(size);
|
||||||
|
Map<ArrayXI>(dst.innerIndexPtr(), size).setLinSpaced(0,StorageIndex(size)-1);
|
||||||
|
Map<ArrayXI>(dst.outerIndexPtr(), size+1).setLinSpaced(0,StorageIndex(size));
|
||||||
|
Map<ArrayXS>(dst.valuePtr(), size) = src.diagonal();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename DstDerived>
|
||||||
|
static void run(SparseMatrixBase<DstDerived> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||||
|
{
|
||||||
|
dst.diagonal() = src.diagonal();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||||
|
{ dst.diagonal() += src.diagonal(); }
|
||||||
|
|
||||||
|
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||||
|
{ dst.diagonal() -= src.diagonal(); }
|
||||||
|
};
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -390,6 +390,22 @@ SparseMatrixBase<Derived>::operator+=(const SparseMatrixBase<OtherDerived>& othe
|
|||||||
return derived() = derived() + other.derived();
|
return derived() = derived() + other.derived();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Derived>
|
||||||
|
template<typename OtherDerived>
|
||||||
|
Derived& SparseMatrixBase<Derived>::operator+=(const DiagonalBase<OtherDerived>& other)
|
||||||
|
{
|
||||||
|
call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar>());
|
||||||
|
return derived();
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Derived>
|
||||||
|
template<typename OtherDerived>
|
||||||
|
Derived& SparseMatrixBase<Derived>::operator-=(const DiagonalBase<OtherDerived>& other)
|
||||||
|
{
|
||||||
|
call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar>());
|
||||||
|
return derived();
|
||||||
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE
|
EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE
|
||||||
|
@ -30,23 +30,48 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
|
|||||||
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
|
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
|
||||||
typedef typename internal::remove_all<DenseResType>::type Res;
|
typedef typename internal::remove_all<DenseResType>::type Res;
|
||||||
typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
|
typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
|
||||||
|
typedef typename evaluator<Lhs>::type LhsEval;
|
||||||
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
|
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
|
||||||
{
|
{
|
||||||
typename evaluator<Lhs>::type lhsEval(lhs);
|
LhsEval lhsEval(lhs);
|
||||||
|
|
||||||
|
Index n = lhs.outerSize();
|
||||||
|
#ifdef EIGEN_HAS_OPENMP
|
||||||
|
Eigen::initParallel();
|
||||||
|
Index threads = Eigen::nbThreads();
|
||||||
|
#endif
|
||||||
|
|
||||||
for(Index c=0; c<rhs.cols(); ++c)
|
for(Index c=0; c<rhs.cols(); ++c)
|
||||||
{
|
{
|
||||||
Index n = lhs.outerSize();
|
#ifdef EIGEN_HAS_OPENMP
|
||||||
for(Index j=0; j<n; ++j)
|
// This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
|
||||||
|
// It basically represents the minimal amount of work to be done to be worth it.
|
||||||
|
if(threads>1 && lhs.nonZeros() > 20000)
|
||||||
|
{
|
||||||
|
#pragma omp parallel for schedule(static) num_threads(threads)
|
||||||
|
for(Index i=0; i<n; ++i)
|
||||||
|
processRow(lhsEval,rhs,res,alpha,i,c);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
for(Index i=0; i<n; ++i)
|
||||||
|
processRow(lhsEval,rhs,res,alpha,i,c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha, Index i, Index col)
|
||||||
{
|
{
|
||||||
typename Res::Scalar tmp(0);
|
typename Res::Scalar tmp(0);
|
||||||
for(LhsInnerIterator it(lhsEval,j); it ;++it)
|
for(LhsInnerIterator it(lhsEval,i); it ;++it)
|
||||||
tmp += it.value() * rhs.coeff(it.index(),c);
|
tmp += it.value() * rhs.coeff(it.index(),col);
|
||||||
res.coeffRef(j,c) += alpha * tmp;
|
res.coeffRef(i,col) += alpha * tmp;
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
|
||||||
template<typename T1, typename T2/*, int _Options, typename _StrideType*/>
|
template<typename T1, typename T2/*, int _Options, typename _StrideType*/>
|
||||||
struct scalar_product_traits<T1, Ref<T2/*, _Options, _StrideType*/> >
|
struct scalar_product_traits<T1, Ref<T2/*, _Options, _StrideType*/> >
|
||||||
{
|
{
|
||||||
|
@ -97,8 +97,8 @@ class SparseMatrix
|
|||||||
using Base::isCompressed;
|
using Base::isCompressed;
|
||||||
using Base::nonZeros;
|
using Base::nonZeros;
|
||||||
_EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix)
|
_EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix)
|
||||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, +=)
|
using Base::operator+=;
|
||||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=)
|
using Base::operator-=;
|
||||||
|
|
||||||
typedef MappedSparseMatrix<Scalar,Flags> Map;
|
typedef MappedSparseMatrix<Scalar,Flags> Map;
|
||||||
typedef Diagonal<SparseMatrix> DiagonalReturnType;
|
typedef Diagonal<SparseMatrix> DiagonalReturnType;
|
||||||
@ -696,6 +696,15 @@ class SparseMatrix
|
|||||||
other.evalTo(*this);
|
other.evalTo(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \brief Copy constructor with in-place evaluation */
|
||||||
|
template<typename OtherDerived>
|
||||||
|
explicit SparseMatrix(const DiagonalBase<OtherDerived>& other)
|
||||||
|
: Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
|
||||||
|
{
|
||||||
|
check_template_parameters();
|
||||||
|
*this = other.derived();
|
||||||
|
}
|
||||||
|
|
||||||
/** Swaps the content of two sparse matrices of the same type.
|
/** Swaps the content of two sparse matrices of the same type.
|
||||||
* This is a fast operation that simply swaps the underlying pointers and parameters. */
|
* This is a fast operation that simply swaps the underlying pointers and parameters. */
|
||||||
inline void swap(SparseMatrix& other)
|
inline void swap(SparseMatrix& other)
|
||||||
|
@ -243,6 +243,11 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
|
|||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
Derived& operator-=(const SparseMatrixBase<OtherDerived>& other);
|
Derived& operator-=(const SparseMatrixBase<OtherDerived>& other);
|
||||||
|
|
||||||
|
template<typename OtherDerived>
|
||||||
|
Derived& operator+=(const DiagonalBase<OtherDerived>& other);
|
||||||
|
template<typename OtherDerived>
|
||||||
|
Derived& operator-=(const DiagonalBase<OtherDerived>& other);
|
||||||
|
|
||||||
Derived& operator*=(const Scalar& other);
|
Derived& operator*=(const Scalar& other);
|
||||||
Derived& operator/=(const Scalar& other);
|
Derived& operator/=(const Scalar& other);
|
||||||
|
|
||||||
@ -367,6 +372,8 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
|
|||||||
static inline StorageIndex convert_index(const Index idx) {
|
static inline StorageIndex convert_index(const Index idx) {
|
||||||
return internal::convert_index<StorageIndex>(idx);
|
return internal::convert_index<StorageIndex>(idx);
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
|
template<typename Dest> void evalTo(Dest &) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -45,8 +45,13 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
enum { Mode = _Mode };
|
enum {
|
||||||
|
Mode = _Mode,
|
||||||
|
RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,
|
||||||
|
ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef EigenBase<SparseSelfAdjointView> Base;
|
||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||||
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
|
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
|
||||||
@ -116,20 +121,6 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
|||||||
template<typename DerivedU>
|
template<typename DerivedU>
|
||||||
SparseSelfAdjointView& rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
|
SparseSelfAdjointView& rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
|
||||||
|
|
||||||
/** \internal triggered by sparse_matrix = SparseSelfadjointView; */
|
|
||||||
template<typename DestScalar,int StorageOrder> void evalTo(SparseMatrix<DestScalar,StorageOrder,StorageIndex>& _dest) const
|
|
||||||
{
|
|
||||||
internal::permute_symm_to_fullsymm<Mode>(m_matrix, _dest);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename DestScalar> void evalTo(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& _dest) const
|
|
||||||
{
|
|
||||||
// TODO directly evaluate into _dest;
|
|
||||||
SparseMatrix<DestScalar,ColMajor,StorageIndex> tmp(_dest.rows(),_dest.cols());
|
|
||||||
internal::permute_symm_to_fullsymm<Mode>(m_matrix, tmp);
|
|
||||||
_dest = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \returns an expression of P H P^-1 */
|
/** \returns an expression of P H P^-1 */
|
||||||
// TODO implement twists in a more evaluator friendly fashion
|
// TODO implement twists in a more evaluator friendly fashion
|
||||||
SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const
|
SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const
|
||||||
@ -140,7 +131,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
|||||||
template<typename SrcMatrixType,int SrcMode>
|
template<typename SrcMatrixType,int SrcMode>
|
||||||
SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcMode>& permutedMatrix)
|
SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcMode>& permutedMatrix)
|
||||||
{
|
{
|
||||||
permutedMatrix.evalTo(*this);
|
internal::call_assignment_no_alias_no_transpose(*this, permutedMatrix);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -157,11 +148,21 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
|||||||
return *this = src.twistedBy(pnull);
|
return *this = src.twistedBy(pnull);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void resize(Index rows, Index cols)
|
||||||
|
{
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(rows);
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(cols);
|
||||||
|
eigen_assert(rows == this->rows() && cols == this->cols()
|
||||||
|
&& "SparseSelfadjointView::resize() does not actually allow to resize.");
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
typename MatrixType::Nested m_matrix;
|
typename MatrixType::Nested m_matrix;
|
||||||
//mutable VectorI m_countPerRow;
|
//mutable VectorI m_countPerRow;
|
||||||
//mutable VectorI m_countPerCol;
|
//mutable VectorI m_countPerCol;
|
||||||
|
private:
|
||||||
|
template<typename Dest> void evalTo(Dest &) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
@ -200,6 +201,47 @@ SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<Derive
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
|
||||||
|
// in the future selfadjoint-ness should be defined by the expression traits
|
||||||
|
// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
|
||||||
|
template<typename MatrixType, unsigned int Mode>
|
||||||
|
struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
|
||||||
|
{
|
||||||
|
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||||
|
typedef SparseSelfAdjointShape Shape;
|
||||||
|
|
||||||
|
static const int AssumeAliasing = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SparseSelfAdjoint2Sparse {};
|
||||||
|
|
||||||
|
template<> struct AssignmentKind<SparseShape,SparseSelfAdjointShape> { typedef SparseSelfAdjoint2Sparse Kind; };
|
||||||
|
template<> struct AssignmentKind<SparseSelfAdjointShape,SparseShape> { typedef Sparse2Sparse Kind; };
|
||||||
|
|
||||||
|
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||||
|
struct Assignment<DstXprType, SrcXprType, Functor, SparseSelfAdjoint2Sparse, Scalar>
|
||||||
|
{
|
||||||
|
typedef typename DstXprType::StorageIndex StorageIndex;
|
||||||
|
template<typename DestScalar,int StorageOrder>
|
||||||
|
static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||||
|
{
|
||||||
|
internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename DestScalar>
|
||||||
|
static void run(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||||
|
{
|
||||||
|
// TODO directly evaluate into dst;
|
||||||
|
SparseMatrix<DestScalar,ColMajor,StorageIndex> tmp(dst.rows(),dst.cols());
|
||||||
|
internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), tmp);
|
||||||
|
dst = tmp;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
* Implementation of sparse self-adjoint time dense matrix
|
* Implementation of sparse self-adjoint time dense matrix
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
@ -253,17 +295,6 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
|
|
||||||
// in the future selfadjoint-ness should be defined by the expression traits
|
|
||||||
// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
|
|
||||||
template<typename MatrixType, unsigned int Mode>
|
|
||||||
struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
|
|
||||||
{
|
|
||||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
|
||||||
typedef SparseSelfAdjointShape Shape;
|
|
||||||
|
|
||||||
static const int AssumeAliasing = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename LhsView, typename Rhs, int ProductType>
|
template<typename LhsView, typename Rhs, int ProductType>
|
||||||
struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType>
|
struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType>
|
||||||
@ -519,12 +550,16 @@ class SparseSymmetricPermutationProduct
|
|||||||
public:
|
public:
|
||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||||
|
enum {
|
||||||
|
RowsAtCompileTime = internal::traits<SparseSymmetricPermutationProduct>::RowsAtCompileTime,
|
||||||
|
ColsAtCompileTime = internal::traits<SparseSymmetricPermutationProduct>::ColsAtCompileTime
|
||||||
|
};
|
||||||
protected:
|
protected:
|
||||||
typedef PermutationMatrix<Dynamic,Dynamic,StorageIndex> Perm;
|
typedef PermutationMatrix<Dynamic,Dynamic,StorageIndex> Perm;
|
||||||
public:
|
public:
|
||||||
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
|
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
|
||||||
typedef typename MatrixType::Nested MatrixTypeNested;
|
typedef typename MatrixType::Nested MatrixTypeNested;
|
||||||
typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
typedef typename internal::remove_all<MatrixTypeNested>::type NestedExpression;
|
||||||
|
|
||||||
SparseSymmetricPermutationProduct(const MatrixType& mat, const Perm& perm)
|
SparseSymmetricPermutationProduct(const MatrixType& mat, const Perm& perm)
|
||||||
: m_matrix(mat), m_perm(perm)
|
: m_matrix(mat), m_perm(perm)
|
||||||
@ -533,19 +568,8 @@ class SparseSymmetricPermutationProduct
|
|||||||
inline Index rows() const { return m_matrix.rows(); }
|
inline Index rows() const { return m_matrix.rows(); }
|
||||||
inline Index cols() const { return m_matrix.cols(); }
|
inline Index cols() const { return m_matrix.cols(); }
|
||||||
|
|
||||||
template<typename DestScalar, int Options, typename DstIndex>
|
const NestedExpression& matrix() const { return m_matrix; }
|
||||||
void evalTo(SparseMatrix<DestScalar,Options,DstIndex>& _dest) const
|
const Perm& perm() const { return m_perm; }
|
||||||
{
|
|
||||||
// internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
|
|
||||||
SparseMatrix<DestScalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
|
|
||||||
internal::permute_symm_to_fullsymm<Mode>(m_matrix,tmp,m_perm.indices().data());
|
|
||||||
_dest = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename DestType,unsigned int DestMode> void evalTo(SparseSelfAdjointView<DestType,DestMode>& dest) const
|
|
||||||
{
|
|
||||||
internal::permute_symm_to_symm<Mode,DestMode>(m_matrix,dest.matrix(),m_perm.indices().data());
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
MatrixTypeNested m_matrix;
|
MatrixTypeNested m_matrix;
|
||||||
@ -553,6 +577,31 @@ class SparseSymmetricPermutationProduct
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template<typename DstXprType, typename MatrixType, int Mode, typename Scalar>
|
||||||
|
struct Assignment<DstXprType, SparseSymmetricPermutationProduct<MatrixType,Mode>, internal::assign_op<Scalar>, Sparse2Sparse>
|
||||||
|
{
|
||||||
|
typedef SparseSymmetricPermutationProduct<MatrixType,Mode> SrcXprType;
|
||||||
|
typedef typename DstXprType::StorageIndex DstIndex;
|
||||||
|
template<int Options>
|
||||||
|
static void run(SparseMatrix<Scalar,Options,DstIndex> &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||||
|
{
|
||||||
|
// internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
|
||||||
|
SparseMatrix<Scalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
|
||||||
|
internal::permute_symm_to_fullsymm<Mode>(src.matrix(),tmp,src.perm().indices().data());
|
||||||
|
dst = tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename DestType,unsigned int DestMode>
|
||||||
|
static void run(SparseSelfAdjointView<DestType,DestMode>& dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||||
|
{
|
||||||
|
internal::permute_symm_to_symm<Mode,DestMode>(src.matrix(),dst.matrix(),src.perm().indices().data());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_SPARSE_SELFADJOINTVIEW_H
|
#endif // EIGEN_SPARSE_SELFADJOINTVIEW_H
|
||||||
|
@ -37,11 +37,7 @@ EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
|
#define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
|
||||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =) \
|
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =)
|
||||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, +=) \
|
|
||||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, -=) \
|
|
||||||
EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, *=) \
|
|
||||||
EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=)
|
|
||||||
|
|
||||||
// TODO this is mostly the same as EIGEN_GENERIC_PUBLIC_INTERFACE
|
// TODO this is mostly the same as EIGEN_GENERIC_PUBLIC_INTERFACE
|
||||||
#define _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \
|
#define _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \
|
||||||
|
@ -152,8 +152,8 @@ Index SparseLUImpl<Scalar,StorageIndex>::memInit(Index m, Index n, Index annz, I
|
|||||||
{
|
{
|
||||||
Index& num_expansions = glu.num_expansions; //No memory expansions so far
|
Index& num_expansions = glu.num_expansions; //No memory expansions so far
|
||||||
num_expansions = 0;
|
num_expansions = 0;
|
||||||
glu.nzumax = glu.nzlumax = (std::min)(fillratio * annz / n, m) * n; // estimated number of nonzeros in U
|
glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U
|
||||||
glu.nzlmax = (std::max)(Index(4), fillratio) * annz / 4; // estimated nnz in L factor
|
glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated nnz in L factor
|
||||||
// Return the estimated size to the user if necessary
|
// Return the estimated size to the user if necessary
|
||||||
Index tempSpace;
|
Index tempSpace;
|
||||||
tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
|
tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
|
||||||
|
@ -23,6 +23,10 @@ namespace internal {
|
|||||||
typedef typename SparseQRType::MatrixType ReturnType;
|
typedef typename SparseQRType::MatrixType ReturnType;
|
||||||
typedef typename ReturnType::StorageIndex StorageIndex;
|
typedef typename ReturnType::StorageIndex StorageIndex;
|
||||||
typedef typename ReturnType::StorageKind StorageKind;
|
typedef typename ReturnType::StorageKind StorageKind;
|
||||||
|
enum {
|
||||||
|
RowsAtCompileTime = Dynamic,
|
||||||
|
ColsAtCompileTime = Dynamic
|
||||||
|
};
|
||||||
};
|
};
|
||||||
template <typename SparseQRType> struct traits<SparseQRMatrixQTransposeReturnType<SparseQRType> >
|
template <typename SparseQRType> struct traits<SparseQRMatrixQTransposeReturnType<SparseQRType> >
|
||||||
{
|
{
|
||||||
@ -235,8 +239,9 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
|
|||||||
return m_info;
|
return m_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
|
||||||
inline void sort_matrix_Q()
|
/** \internal */
|
||||||
|
inline void _sort_matrix_Q()
|
||||||
{
|
{
|
||||||
if(this->m_isQSorted) return;
|
if(this->m_isQSorted) return;
|
||||||
// The matrix Q is sorted during the transposition
|
// The matrix Q is sorted during the transposition
|
||||||
@ -267,7 +272,6 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
|
|||||||
bool m_isEtreeOk; // whether the elimination tree match the initial input matrix
|
bool m_isEtreeOk; // whether the elimination tree match the initial input matrix
|
||||||
|
|
||||||
template <typename, typename > friend struct SparseQR_QProduct;
|
template <typename, typename > friend struct SparseQR_QProduct;
|
||||||
template <typename > friend struct SparseQRMatrixQReturnType;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -635,6 +639,10 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
|
|||||||
{
|
{
|
||||||
typedef typename SparseQRType::Scalar Scalar;
|
typedef typename SparseQRType::Scalar Scalar;
|
||||||
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
|
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
|
||||||
|
enum {
|
||||||
|
RowsAtCompileTime = Dynamic,
|
||||||
|
ColsAtCompileTime = Dynamic
|
||||||
|
};
|
||||||
explicit SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {}
|
explicit SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {}
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
SparseQR_QProduct<SparseQRType, Derived> operator*(const MatrixBase<Derived>& other)
|
SparseQR_QProduct<SparseQRType, Derived> operator*(const MatrixBase<Derived>& other)
|
||||||
@ -652,19 +660,6 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
|
|||||||
{
|
{
|
||||||
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
|
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
|
||||||
}
|
}
|
||||||
template<typename Dest> void evalTo(MatrixBase<Dest>& dest) const
|
|
||||||
{
|
|
||||||
dest.derived() = m_qr.matrixQ() * Dest::Identity(m_qr.rows(), m_qr.rows());
|
|
||||||
}
|
|
||||||
template<typename Dest> void evalTo(SparseMatrixBase<Dest>& dest) const
|
|
||||||
{
|
|
||||||
Dest idMat(m_qr.rows(), m_qr.rows());
|
|
||||||
idMat.setIdentity();
|
|
||||||
// Sort the sparse householder reflectors if needed
|
|
||||||
const_cast<SparseQRType *>(&m_qr)->sort_matrix_Q();
|
|
||||||
dest.derived() = SparseQR_QProduct<SparseQRType, Dest>(m_qr, idMat, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
const SparseQRType& m_qr;
|
const SparseQRType& m_qr;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -680,6 +675,47 @@ struct SparseQRMatrixQTransposeReturnType
|
|||||||
const SparseQRType& m_qr;
|
const SparseQRType& m_qr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template<typename SparseQRType>
|
||||||
|
struct evaluator_traits<SparseQRMatrixQReturnType<SparseQRType> >
|
||||||
|
{
|
||||||
|
typedef typename SparseQRType::MatrixType MatrixType;
|
||||||
|
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||||
|
typedef SparseShape Shape;
|
||||||
|
static const int AssumeAliasing = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
template< typename DstXprType, typename SparseQRType>
|
||||||
|
struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar>, Sparse2Sparse>
|
||||||
|
{
|
||||||
|
typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;
|
||||||
|
typedef typename DstXprType::Scalar Scalar;
|
||||||
|
typedef typename DstXprType::StorageIndex StorageIndex;
|
||||||
|
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &/*func*/)
|
||||||
|
{
|
||||||
|
typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows());
|
||||||
|
idMat.setIdentity();
|
||||||
|
// Sort the sparse householder reflectors if needed
|
||||||
|
const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q();
|
||||||
|
dst = SparseQR_QProduct<SparseQRType, DstXprType>(src.m_qr, idMat, false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template< typename DstXprType, typename SparseQRType>
|
||||||
|
struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar>, Sparse2Dense>
|
||||||
|
{
|
||||||
|
typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;
|
||||||
|
typedef typename DstXprType::Scalar Scalar;
|
||||||
|
typedef typename DstXprType::StorageIndex StorageIndex;
|
||||||
|
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &/*func*/)
|
||||||
|
{
|
||||||
|
dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -26,6 +26,8 @@ function(EigenDetermineVSServicePack _pack)
|
|||||||
set(_sp "vc110sp2")
|
set(_sp "vc110sp2")
|
||||||
elseif(${_cl_version} VERSION_EQUAL "17.00.60610.1")
|
elseif(${_cl_version} VERSION_EQUAL "17.00.60610.1")
|
||||||
set(_sp "vc110sp3")
|
set(_sp "vc110sp3")
|
||||||
|
else()
|
||||||
|
set(_sp ${CMAKE_CXX_COMPILER_VERSION})
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -367,7 +367,7 @@ macro(ei_get_compilerver VAR)
|
|||||||
# on all other system we rely on ${CMAKE_CXX_COMPILER}
|
# on all other system we rely on ${CMAKE_CXX_COMPILER}
|
||||||
# supporting a "--version" or "/version" flag
|
# supporting a "--version" or "/version" flag
|
||||||
|
|
||||||
if(WIN32 AND NOT CYGWIN)
|
if(WIN32 AND NOT CYGWIN AND NOT MINGW)
|
||||||
set(EIGEN_CXX_FLAG_VERSION "/version")
|
set(EIGEN_CXX_FLAG_VERSION "/version")
|
||||||
else()
|
else()
|
||||||
set(EIGEN_CXX_FLAG_VERSION "--version")
|
set(EIGEN_CXX_FLAG_VERSION "--version")
|
||||||
|
@ -13,17 +13,17 @@ The Eigen library is divided in a Core module and several additional modules. Ea
|
|||||||
|
|
||||||
<table class="manual">
|
<table class="manual">
|
||||||
<tr><th>Module</th><th>Header file</th><th>Contents</th></tr>
|
<tr><th>Module</th><th>Header file</th><th>Contents</th></tr>
|
||||||
<tr><td>\link Core_Module Core \endlink</td><td>\code#include <Eigen/Core>\endcode</td><td>Matrix and Array classes, basic linear algebra (including triangular and selfadjoint products), array manipulation</td></tr>
|
<tr ><td>\link Core_Module Core \endlink</td><td>\code#include <Eigen/Core>\endcode</td><td>Matrix and Array classes, basic linear algebra (including triangular and selfadjoint products), array manipulation</td></tr>
|
||||||
<tr class="alt"><td>\link Geometry_Module Geometry \endlink</td><td>\code#include <Eigen/Geometry>\endcode</td><td>Transform, Translation, Scaling, Rotation2D and 3D rotations (Quaternion, AngleAxis)</td></tr>
|
<tr class="alt"><td>\link Geometry_Module Geometry \endlink</td><td>\code#include <Eigen/Geometry>\endcode</td><td>Transform, Translation, Scaling, Rotation2D and 3D rotations (Quaternion, AngleAxis)</td></tr>
|
||||||
<tr><td>\link LU_Module LU \endlink</td><td>\code#include <Eigen/LU>\endcode</td><td>Inverse, determinant, LU decompositions with solver (FullPivLU, PartialPivLU)</td></tr>
|
<tr ><td>\link LU_Module LU \endlink</td><td>\code#include <Eigen/LU>\endcode</td><td>Inverse, determinant, LU decompositions with solver (FullPivLU, PartialPivLU)</td></tr>
|
||||||
<tr><td>\link Cholesky_Module Cholesky \endlink</td><td>\code#include <Eigen/Cholesky>\endcode</td><td>LLT and LDLT Cholesky factorization with solver</td></tr>
|
<tr class="alt"><td>\link Cholesky_Module Cholesky \endlink</td><td>\code#include <Eigen/Cholesky>\endcode</td><td>LLT and LDLT Cholesky factorization with solver</td></tr>
|
||||||
<tr class="alt"><td>\link Householder_Module Householder \endlink</td><td>\code#include <Eigen/Householder>\endcode</td><td>Householder transformations; this module is used by several linear algebra modules</td></tr>
|
<tr ><td>\link Householder_Module Householder \endlink</td><td>\code#include <Eigen/Householder>\endcode</td><td>Householder transformations; this module is used by several linear algebra modules</td></tr>
|
||||||
<tr><td>\link SVD_Module SVD \endlink</td><td>\code#include <Eigen/SVD>\endcode</td><td>SVD decomposition with least-squares solver (JacobiSVD)</td></tr>
|
<tr class="alt"><td>\link SVD_Module SVD \endlink</td><td>\code#include <Eigen/SVD>\endcode</td><td>SVD decompositions with least-squares solver (JacobiSVD, BDCSVD)</td></tr>
|
||||||
<tr class="alt"><td>\link QR_Module QR \endlink</td><td>\code#include <Eigen/QR>\endcode</td><td>QR decomposition with solver (HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR)</td></tr>
|
<tr ><td>\link QR_Module QR \endlink</td><td>\code#include <Eigen/QR>\endcode</td><td>QR decomposition with solver (HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR)</td></tr>
|
||||||
<tr><td>\link Eigenvalues_Module Eigenvalues \endlink</td><td>\code#include <Eigen/Eigenvalues>\endcode</td><td>Eigenvalue, eigenvector decompositions (EigenSolver, SelfAdjointEigenSolver, ComplexEigenSolver)</td></tr>
|
<tr class="alt"><td>\link Eigenvalues_Module Eigenvalues \endlink</td><td>\code#include <Eigen/Eigenvalues>\endcode</td><td>Eigenvalue, eigenvector decompositions (EigenSolver, SelfAdjointEigenSolver, ComplexEigenSolver)</td></tr>
|
||||||
<tr class="alt"><td>\link Sparse_modules Sparse \endlink</td><td>\code#include <Eigen/Sparse>\endcode</td><td>%Sparse matrix storage and related basic linear algebra (SparseMatrix, DynamicSparseMatrix, SparseVector)</td></tr>
|
<tr ><td>\link Sparse_modules Sparse \endlink</td><td>\code#include <Eigen/Sparse>\endcode</td><td>%Sparse matrix storage and related basic linear algebra (SparseMatrix, SparseVector) \n (see \ref SparseQuickRefPage for details on sparse modules)</td></tr>
|
||||||
<tr><td></td><td>\code#include <Eigen/Dense>\endcode</td><td>Includes Core, Geometry, LU, Cholesky, SVD, QR, and Eigenvalues header files</td></tr>
|
<tr class="alt"><td></td><td>\code#include <Eigen/Dense>\endcode</td><td>Includes Core, Geometry, LU, Cholesky, SVD, QR, and Eigenvalues header files</td></tr>
|
||||||
<tr class="alt"><td></td><td>\code#include <Eigen/Eigen>\endcode</td><td>Includes %Dense and %Sparse header files (the whole Eigen library)</td></tr>
|
<tr ><td></td><td>\code#include <Eigen/Eigen>\endcode</td><td>Includes %Dense and %Sparse header files (the whole Eigen library)</td></tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
<a href="#" class="top">top</a>
|
<a href="#" class="top">top</a>
|
||||||
@ -364,32 +364,10 @@ vec3 = vec1.cross(vec2);\endcode</td></tr>
|
|||||||
|
|
||||||
<a href="#" class="top">top</a>
|
<a href="#" class="top">top</a>
|
||||||
\section QuickRef_Coeffwise Coefficient-wise \& Array operators
|
\section QuickRef_Coeffwise Coefficient-wise \& Array operators
|
||||||
Coefficient-wise operators for matrices and vectors:
|
|
||||||
<table class="manual">
|
|
||||||
<tr><th>Matrix API \matrixworld</th><th>Via Array conversions</th></tr>
|
|
||||||
<tr><td>\code
|
|
||||||
mat1.cwiseMin(mat2)
|
|
||||||
mat1.cwiseMax(mat2)
|
|
||||||
mat1.cwiseAbs2()
|
|
||||||
mat1.cwiseAbs()
|
|
||||||
mat1.cwiseSqrt()
|
|
||||||
mat1.cwiseProduct(mat2)
|
|
||||||
mat1.cwiseQuotient(mat2)\endcode
|
|
||||||
</td><td>\code
|
|
||||||
mat1.array().min(mat2.array())
|
|
||||||
mat1.array().max(mat2.array())
|
|
||||||
mat1.array().abs2()
|
|
||||||
mat1.array().abs()
|
|
||||||
mat1.array().sqrt()
|
|
||||||
mat1.array() * mat2.array()
|
|
||||||
mat1.array() / mat2.array()
|
|
||||||
\endcode</td></tr>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with std::ptr_fun:
|
In addition to the aforementioned operators, Eigen supports numerous coefficient-wise operator and functions.
|
||||||
\code mat1.unaryExpr(std::ptr_fun(foo))\endcode
|
Most of them unambiguously makes sense in array-world\arrayworld. The following operators are readily available for arrays,
|
||||||
|
or available through .array() for vectors and matrices:
|
||||||
Array operators:\arrayworld
|
|
||||||
|
|
||||||
<table class="manual">
|
<table class="manual">
|
||||||
<tr><td>Arithmetic operators</td><td>\code
|
<tr><td>Arithmetic operators</td><td>\code
|
||||||
@ -400,28 +378,107 @@ array1 + scalar array1 - scalar array1 += scalar array1 -= scalar
|
|||||||
array1 < array2 array1 > array2 array1 < scalar array1 > scalar
|
array1 < array2 array1 > array2 array1 < scalar array1 > scalar
|
||||||
array1 <= array2 array1 >= array2 array1 <= scalar array1 >= scalar
|
array1 <= array2 array1 >= array2 array1 <= scalar array1 >= scalar
|
||||||
array1 == array2 array1 != array2 array1 == scalar array1 != scalar
|
array1 == array2 array1 != array2 array1 == scalar array1 != scalar
|
||||||
|
array1.min(array2) array1.max(array2) array1.min(scalar) array1.max(scalar)
|
||||||
\endcode</td></tr>
|
\endcode</td></tr>
|
||||||
<tr><td>Trigo, power, and \n misc functions \n and the STL variants</td><td>\code
|
<tr><td>Trigo, power, and \n misc functions \n and the STL-like variants</td><td>\code
|
||||||
array1.min(array2)
|
|
||||||
array1.max(array2)
|
|
||||||
array1.abs2()
|
array1.abs2()
|
||||||
array1.abs() abs(array1)
|
array1.abs() abs(array1)
|
||||||
array1.sqrt() sqrt(array1)
|
array1.sqrt() sqrt(array1)
|
||||||
array1.log() log(array1)
|
array1.log() log(array1)
|
||||||
|
array1.log10() log10(array1)
|
||||||
array1.exp() exp(array1)
|
array1.exp() exp(array1)
|
||||||
array1.pow(exponent) pow(array1,exponent)
|
array1.pow(array2) pow(array1,array2)
|
||||||
|
array1.pow(scalar) pow(array1,scalar)
|
||||||
array1.square()
|
array1.square()
|
||||||
array1.cube()
|
array1.cube()
|
||||||
array1.inverse()
|
array1.inverse()
|
||||||
|
|
||||||
array1.sin() sin(array1)
|
array1.sin() sin(array1)
|
||||||
array1.cos() cos(array1)
|
array1.cos() cos(array1)
|
||||||
array1.tan() tan(array1)
|
array1.tan() tan(array1)
|
||||||
array1.asin() asin(array1)
|
array1.asin() asin(array1)
|
||||||
array1.acos() acos(array1)
|
array1.acos() acos(array1)
|
||||||
|
array1.atan() atan(array1)
|
||||||
|
array1.sinh() sinh(array1)
|
||||||
|
array1.cosh() cosh(array1)
|
||||||
|
array1.tanh() tanh(array1)
|
||||||
|
array1.arg() arg(array1)
|
||||||
|
|
||||||
|
array1.floor() floor(array1)
|
||||||
|
array1.ceil() ceil(array1)
|
||||||
|
array1.round() round(aray1)
|
||||||
|
|
||||||
|
array1.isFinite() isfinite(array1)
|
||||||
|
array1.isInf() isinf(array1)
|
||||||
|
array1.isNaN() isnan(array1)
|
||||||
\endcode
|
\endcode
|
||||||
</td></tr>
|
</td></tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
The following coefficient-wise operators are available for all kind of expressions (matrices, vectors, and arrays), and for both real or complex scalar types:
|
||||||
|
|
||||||
|
<table class="manual">
|
||||||
|
<tr><th>Eigen's API</th><th>STL-like APIs\arrayworld </th><th>Comments</th></tr>
|
||||||
|
<tr><td>\code
|
||||||
|
mat1.real()
|
||||||
|
mat1.imag()
|
||||||
|
mat1.conjugate()
|
||||||
|
\endcode
|
||||||
|
</td><td>\code
|
||||||
|
real(array1)
|
||||||
|
imag(array1)
|
||||||
|
conj(array1)
|
||||||
|
\endcode
|
||||||
|
</td><td>
|
||||||
|
\code
|
||||||
|
// read-write, no-op for real expressions
|
||||||
|
// read-only for real, read-write for complexes
|
||||||
|
// no-op for real expressions
|
||||||
|
\endcode
|
||||||
|
</td></tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
Some coefficient-wise operators are readily available for for matrices and vectors through the following cwise* methods:
|
||||||
|
<table class="manual">
|
||||||
|
<tr><th>Matrix API \matrixworld</th><th>Via Array conversions</th></tr>
|
||||||
|
<tr><td>\code
|
||||||
|
mat1.cwiseMin(mat2) mat1.cwiseMin(scalar)
|
||||||
|
mat1.cwiseMax(mat2) mat1.cwiseMax(scalar)
|
||||||
|
mat1.cwiseAbs2()
|
||||||
|
mat1.cwiseAbs()
|
||||||
|
mat1.cwiseSqrt()
|
||||||
|
mat1.cwiseInverse()
|
||||||
|
mat1.cwiseProduct(mat2)
|
||||||
|
mat1.cwiseQuotient(mat2)
|
||||||
|
mat1.cwiseEqual(mat2) mat1.cwiseEqual(scalar)
|
||||||
|
mat1.cwiseNotEqual(mat2)
|
||||||
|
\endcode
|
||||||
|
</td><td>\code
|
||||||
|
mat1.array().min(mat2.array()) mat1.array().min(scalar)
|
||||||
|
mat1.array().max(mat2.array()) mat1.array().max(scalar)
|
||||||
|
mat1.array().abs2()
|
||||||
|
mat1.array().abs()
|
||||||
|
mat1.array().sqrt()
|
||||||
|
mat1.array().inverse()
|
||||||
|
mat1.array() * mat2.array()
|
||||||
|
mat1.array() / mat2.array()
|
||||||
|
mat1.array() == mat2.array() mat1.array() == scalar
|
||||||
|
mat1.array() != mat2.array()
|
||||||
|
\endcode</td></tr>
|
||||||
|
</table>
|
||||||
|
The main difference between the two API is that the one based on cwise* methods returns an expression in the matrix world,
|
||||||
|
while the second one (based on .array()) returns an array expression.
|
||||||
|
Recall that .array() has no cost, it only changes the available API and interpretation of the data.
|
||||||
|
|
||||||
|
It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with <a href="http://en.cppreference.com/w/cpp/utility/functional/ptr_fun">std::ptr_fun</a> (c++03), <a href="http://en.cppreference.com/w/cpp/utility/functional/ref">std::ref</a> (c++11), or <a href="http://en.cppreference.com/w/cpp/language/lambda">lambdas</a> (c++11):
|
||||||
|
\code
|
||||||
|
mat1.unaryExpr(std::ptr_fun(foo));
|
||||||
|
mat1.unaryExpr(std::ref(foo));
|
||||||
|
mat1.unaryExpr([](double x) { return foo(x); });
|
||||||
|
\endcode
|
||||||
|
|
||||||
|
|
||||||
<a href="#" class="top">top</a>
|
<a href="#" class="top">top</a>
|
||||||
\section QuickRef_Reductions Reductions
|
\section QuickRef_Reductions Reductions
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ They are summarized in the following table:
|
|||||||
<tr><td>ConjugateGradient</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Classic iterative CG</td><td>SPD</td><td>Preconditionning</td>
|
<tr><td>ConjugateGradient</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Classic iterative CG</td><td>SPD</td><td>Preconditionning</td>
|
||||||
<td>built-in, MPL2</td>
|
<td>built-in, MPL2</td>
|
||||||
<td>Recommended for large symmetric problems (e.g., 3D Poisson eq.)</td></tr>
|
<td>Recommended for large symmetric problems (e.g., 3D Poisson eq.)</td></tr>
|
||||||
<tr><td>LSCG</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>CG for rectangular least-square problem</td><td>Rectangular</td><td>Preconditionning</td>
|
<tr><td>LeastSquaresConjugateGradient</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>CG for rectangular least-square problem</td><td>Rectangular</td><td>Preconditionning</td>
|
||||||
<td>built-in, MPL2</td>
|
<td>built-in, MPL2</td>
|
||||||
<td>Solve for min |A'Ax-b|^2 without forming A'A</td></tr>
|
<td>Solve for min |A'Ax-b|^2 without forming A'A</td></tr>
|
||||||
<tr><td>BiCGSTAB</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Iterative stabilized bi-conjugate gradient</td><td>Square</td><td>Preconditionning</td>
|
<tr><td>BiCGSTAB</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Iterative stabilized bi-conjugate gradient</td><td>Square</td><td>Preconditionning</td>
|
||||||
|
@ -22,8 +22,12 @@ n = Eigen::nbThreads( );
|
|||||||
You can disable Eigen's multi threading at compile time by defining the EIGEN_DONT_PARALLELIZE preprocessor token.
|
You can disable Eigen's multi threading at compile time by defining the EIGEN_DONT_PARALLELIZE preprocessor token.
|
||||||
|
|
||||||
Currently, the following algorithms can make use of multi-threading:
|
Currently, the following algorithms can make use of multi-threading:
|
||||||
* general matrix - matrix products
|
- general dense matrix - matrix products
|
||||||
* PartialPivLU
|
- PartialPivLU
|
||||||
|
- row-major-sparse * dense vector/matrix products
|
||||||
|
- ConjugateGradient with \c Lower|Upper as the \c UpLo template parameter.
|
||||||
|
- BiCGSTAB with a row-major sparse matrix format.
|
||||||
|
- LeastSquaresConjugateGradient
|
||||||
|
|
||||||
\section TopicMultiThreading_UsingEigenWithMT Using Eigen in a multi-threaded application
|
\section TopicMultiThreading_UsingEigenWithMT Using Eigen in a multi-threaded application
|
||||||
|
|
||||||
|
@ -125,7 +125,10 @@ template<typename ArrayType> void comparisons(const ArrayType& m)
|
|||||||
|
|
||||||
ArrayType m1 = ArrayType::Random(rows, cols),
|
ArrayType m1 = ArrayType::Random(rows, cols),
|
||||||
m2 = ArrayType::Random(rows, cols),
|
m2 = ArrayType::Random(rows, cols),
|
||||||
m3(rows, cols);
|
m3(rows, cols),
|
||||||
|
m4 = m1;
|
||||||
|
|
||||||
|
m4 = (m4.abs()==Scalar(0)).select(1,m4);
|
||||||
|
|
||||||
VERIFY(((m1 + Scalar(1)) > m1).all());
|
VERIFY(((m1 + Scalar(1)) > m1).all());
|
||||||
VERIFY(((m1 - Scalar(1)) < m1).all());
|
VERIFY(((m1 - Scalar(1)) < m1).all());
|
||||||
@ -197,7 +200,10 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
|||||||
|
|
||||||
ArrayType m1 = ArrayType::Random(rows, cols),
|
ArrayType m1 = ArrayType::Random(rows, cols),
|
||||||
m2 = ArrayType::Random(rows, cols),
|
m2 = ArrayType::Random(rows, cols),
|
||||||
m3(rows, cols);
|
m3(rows, cols),
|
||||||
|
m4 = m1;
|
||||||
|
|
||||||
|
m4 = (m4.abs()==Scalar(0)).select(1,m4);
|
||||||
|
|
||||||
Scalar s1 = internal::random<Scalar>();
|
Scalar s1 = internal::random<Scalar>();
|
||||||
|
|
||||||
@ -215,9 +221,9 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
|||||||
VERIFY_IS_APPROX(m1.round(), round(m1));
|
VERIFY_IS_APPROX(m1.round(), round(m1));
|
||||||
VERIFY_IS_APPROX(m1.floor(), floor(m1));
|
VERIFY_IS_APPROX(m1.floor(), floor(m1));
|
||||||
VERIFY_IS_APPROX(m1.ceil(), ceil(m1));
|
VERIFY_IS_APPROX(m1.ceil(), ceil(m1));
|
||||||
VERIFY((m1.isNaN() == isnan(m1)).all());
|
VERIFY((m1.isNaN() == Eigen::isnan(m1)).all());
|
||||||
VERIFY((m1.isInf() == isinf(m1)).all());
|
VERIFY((m1.isInf() == Eigen::isinf(m1)).all());
|
||||||
VERIFY((m1.isFinite() == isfinite(m1)).all());
|
VERIFY((m1.isFinite() == Eigen::isfinite(m1)).all());
|
||||||
VERIFY_IS_APPROX(m1.inverse(), inverse(m1));
|
VERIFY_IS_APPROX(m1.inverse(), inverse(m1));
|
||||||
VERIFY_IS_APPROX(m1.abs(), abs(m1));
|
VERIFY_IS_APPROX(m1.abs(), abs(m1));
|
||||||
VERIFY_IS_APPROX(m1.abs2(), abs2(m1));
|
VERIFY_IS_APPROX(m1.abs2(), abs2(m1));
|
||||||
@ -243,9 +249,9 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
|||||||
VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
|
VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
|
||||||
VERIFY_IS_APPROX(arg(m1), ((ArrayType)(m1<0))*std::acos(-1.0));
|
VERIFY_IS_APPROX(arg(m1), ((ArrayType)(m1<0))*std::acos(-1.0));
|
||||||
VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all());
|
VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all());
|
||||||
VERIFY(isnan(m1*0.0/0.0).all());
|
VERIFY(Eigen::isnan((m1*0.0)/0.0).all());
|
||||||
VERIFY(isinf(m1/0.0).all());
|
VERIFY(Eigen::isinf(m4/0.0).all());
|
||||||
VERIFY((isfinite(m1) && !isfinite(m1*0.0/0.0) && !isfinite(m1/0.0)).all());
|
VERIFY((Eigen::isfinite(m1) && (!Eigen::isfinite(m1*0.0/0.0)) && (!Eigen::isfinite(m4/0.0))).all());
|
||||||
VERIFY_IS_APPROX(inverse(inverse(m1)),m1);
|
VERIFY_IS_APPROX(inverse(inverse(m1)),m1);
|
||||||
VERIFY((abs(m1) == m1 || abs(m1) == -m1).all());
|
VERIFY((abs(m1) == m1 || abs(m1) == -m1).all());
|
||||||
VERIFY_IS_APPROX(m3, sqrt(abs2(m1)));
|
VERIFY_IS_APPROX(m3, sqrt(abs2(m1)));
|
||||||
@ -299,7 +305,11 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
|
|||||||
Index cols = m.cols();
|
Index cols = m.cols();
|
||||||
|
|
||||||
ArrayType m1 = ArrayType::Random(rows, cols),
|
ArrayType m1 = ArrayType::Random(rows, cols),
|
||||||
m2(rows, cols);
|
m2(rows, cols),
|
||||||
|
m4 = m1;
|
||||||
|
|
||||||
|
m4.real() = (m4.real().abs()==RealScalar(0)).select(RealScalar(1),m4.real());
|
||||||
|
m4.imag() = (m4.imag().abs()==RealScalar(0)).select(RealScalar(1),m4.imag());
|
||||||
|
|
||||||
Array<RealScalar, -1, -1> m3(rows, cols);
|
Array<RealScalar, -1, -1> m3(rows, cols);
|
||||||
|
|
||||||
@ -317,9 +327,9 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
|
|||||||
VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
|
VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
|
||||||
VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
|
VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
|
||||||
VERIFY_IS_APPROX(m1.arg(), arg(m1));
|
VERIFY_IS_APPROX(m1.arg(), arg(m1));
|
||||||
VERIFY((m1.isNaN() == isnan(m1)).all());
|
VERIFY((m1.isNaN() == Eigen::isnan(m1)).all());
|
||||||
VERIFY((m1.isInf() == isinf(m1)).all());
|
VERIFY((m1.isInf() == Eigen::isinf(m1)).all());
|
||||||
VERIFY((m1.isFinite() == isfinite(m1)).all());
|
VERIFY((m1.isFinite() == Eigen::isfinite(m1)).all());
|
||||||
VERIFY_IS_APPROX(m1.inverse(), inverse(m1));
|
VERIFY_IS_APPROX(m1.inverse(), inverse(m1));
|
||||||
VERIFY_IS_APPROX(m1.log(), log(m1));
|
VERIFY_IS_APPROX(m1.log(), log(m1));
|
||||||
VERIFY_IS_APPROX(m1.log10(), log10(m1));
|
VERIFY_IS_APPROX(m1.log10(), log10(m1));
|
||||||
@ -345,20 +355,20 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
|
|||||||
VERIFY_IS_APPROX(arg(m1), m3);
|
VERIFY_IS_APPROX(arg(m1), m3);
|
||||||
|
|
||||||
std::complex<RealScalar> zero(0.0,0.0);
|
std::complex<RealScalar> zero(0.0,0.0);
|
||||||
VERIFY(isnan(m1*zero/zero).all());
|
VERIFY(Eigen::isnan(m1*zero/zero).all());
|
||||||
#if EIGEN_COMP_CLANG
|
#if EIGEN_COMP_CLANG
|
||||||
// clang's complex division is notoriously broken
|
// clang's complex division is notoriously broken
|
||||||
if(numext::isinf(m1(0,0)/Scalar(0))) {
|
if(numext::isinf(m4(0,0)/RealScalar(0))) {
|
||||||
#endif
|
#endif
|
||||||
VERIFY(isinf(m1/zero).all());
|
VERIFY(Eigen::isinf(m4/zero).all());
|
||||||
#if EIGEN_COMP_CLANG
|
#if EIGEN_COMP_CLANG
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VERIFY(isinf(m1.real()/zero.real()).all());
|
VERIFY(Eigen::isinf(m4.real()/zero.real()).all());
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
VERIFY((isfinite(m1) && !isfinite(m1*zero/zero) && !isfinite(m1/zero)).all());
|
VERIFY((Eigen::isfinite(m1) && (!Eigen::isfinite(m1*zero/zero)) && (!Eigen::isfinite(m1/zero))).all());
|
||||||
|
|
||||||
VERIFY_IS_APPROX(inverse(inverse(m1)),m1);
|
VERIFY_IS_APPROX(inverse(inverse(m1)),m1);
|
||||||
VERIFY_IS_APPROX(conj(m1.conjugate()), m1);
|
VERIFY_IS_APPROX(conj(m1.conjugate()), m1);
|
||||||
|
@ -223,7 +223,7 @@ void fixedSizeMatrixConstruction()
|
|||||||
for(int k=0; k<2; ++k) VERIFY(m2(k) == DenseIndex(raw[k]));
|
for(int k=0; k<2; ++k) VERIFY(m2(k) == DenseIndex(raw[k]));
|
||||||
for(int k=0; k<2; ++k) VERIFY(a2(k) == DenseIndex(raw[k]));
|
for(int k=0; k<2; ++k) VERIFY(a2(k) == DenseIndex(raw[k]));
|
||||||
for(int k=0; k<2; ++k) VERIFY(m3(k) == int(raw[k]));
|
for(int k=0; k<2; ++k) VERIFY(m3(k) == int(raw[k]));
|
||||||
for(int k=0; k<2; ++k) VERIFY(m4(k) == float(raw[k]));
|
for(int k=0; k<2; ++k) VERIFY((m4(k)) == Scalar(float(raw[k])));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Matrix<Scalar,1,1> m(raw), m1(raw[0]), m2( (DenseIndex(raw[0])) ), m3( (int(raw[0])) );
|
Matrix<Scalar,1,1> m(raw), m1(raw[0]), m2( (DenseIndex(raw[0])) ), m3( (int(raw[0])) );
|
||||||
|
@ -69,8 +69,8 @@ void test_bdcsvd()
|
|||||||
CALL_SUBTEST_7(( svd_verify_assert<BDCSVD<MatrixXf> >(MatrixXf(10,12)) ));
|
CALL_SUBTEST_7(( svd_verify_assert<BDCSVD<MatrixXf> >(MatrixXf(10,12)) ));
|
||||||
CALL_SUBTEST_8(( svd_verify_assert<BDCSVD<MatrixXcd> >(MatrixXcd(7,5)) ));
|
CALL_SUBTEST_8(( svd_verify_assert<BDCSVD<MatrixXcd> >(MatrixXcd(7,5)) ));
|
||||||
|
|
||||||
CALL_SUBTEST_1(( svd_all_trivial_2x2(bdcsvd<Matrix2cd>) ));
|
CALL_SUBTEST_101(( svd_all_trivial_2x2(bdcsvd<Matrix2cd>) ));
|
||||||
CALL_SUBTEST_1(( svd_all_trivial_2x2(bdcsvd<Matrix2d>) ));
|
CALL_SUBTEST_102(( svd_all_trivial_2x2(bdcsvd<Matrix2d>) ));
|
||||||
|
|
||||||
for(int i = 0; i < g_repeat; i++) {
|
for(int i = 0; i < g_repeat; i++) {
|
||||||
CALL_SUBTEST_3(( bdcsvd<Matrix3f>() ));
|
CALL_SUBTEST_3(( bdcsvd<Matrix3f>() ));
|
||||||
@ -104,8 +104,8 @@ void test_bdcsvd()
|
|||||||
CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) );
|
CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) );
|
||||||
|
|
||||||
// Check that preallocation avoids subsequent mallocs
|
// Check that preallocation avoids subsequent mallocs
|
||||||
CALL_SUBTEST_9( svd_preallocate() );
|
CALL_SUBTEST_9( svd_preallocate<void>() );
|
||||||
|
|
||||||
CALL_SUBTEST_2( svd_underoverflow() );
|
CALL_SUBTEST_2( svd_underoverflow<void>() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,7 +114,7 @@ void test_jacobisvd()
|
|||||||
CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) );
|
CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) );
|
||||||
|
|
||||||
// Check that preallocation avoids subsequent mallocs
|
// Check that preallocation avoids subsequent mallocs
|
||||||
CALL_SUBTEST_9( svd_preallocate() );
|
CALL_SUBTEST_9( svd_preallocate<void>() );
|
||||||
|
|
||||||
CALL_SUBTEST_2( svd_underoverflow() );
|
CALL_SUBTEST_2( svd_underoverflow<void>() );
|
||||||
}
|
}
|
||||||
|
@ -88,6 +88,10 @@ template<typename MatrixType> void real_complex(DenseIndex rows = MatrixType::Ro
|
|||||||
g_called = false;
|
g_called = false;
|
||||||
VERIFY_IS_APPROX(m1*s, m1*Scalar(s));
|
VERIFY_IS_APPROX(m1*s, m1*Scalar(s));
|
||||||
VERIFY(g_called && "matrix<complex> * real not properly optimized");
|
VERIFY(g_called && "matrix<complex> * real not properly optimized");
|
||||||
|
|
||||||
|
g_called = false;
|
||||||
|
VERIFY_IS_APPROX(m1/s, m1/Scalar(s));
|
||||||
|
VERIFY(g_called && "matrix<complex> / real not properly optimized");
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_linearstructure()
|
void test_linearstructure()
|
||||||
|
@ -315,9 +315,29 @@ template<typename Scalar> void packetmath_real()
|
|||||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasExp, std::exp, internal::pexp);
|
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasExp, std::exp, internal::pexp);
|
||||||
{
|
{
|
||||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||||
|
data1[1] = std::numeric_limits<Scalar>::epsilon();
|
||||||
packet_helper<internal::packet_traits<Scalar>::HasExp,Packet> h;
|
packet_helper<internal::packet_traits<Scalar>::HasExp,Packet> h;
|
||||||
h.store(data2, internal::pexp(h.load(data1)));
|
h.store(data2, internal::pexp(h.load(data1)));
|
||||||
VERIFY(numext::isnan(data2[0]));
|
VERIFY(numext::isnan(data2[0]));
|
||||||
|
VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
||||||
|
|
||||||
|
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
||||||
|
data1[1] = 0;
|
||||||
|
h.store(data2, internal::pexp(h.load(data1)));
|
||||||
|
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]);
|
||||||
|
VERIFY_IS_EQUAL(std::exp(0), data2[1]);
|
||||||
|
|
||||||
|
data1[0] = (std::numeric_limits<Scalar>::min)();
|
||||||
|
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
||||||
|
h.store(data2, internal::pexp(h.load(data1)));
|
||||||
|
VERIFY_IS_EQUAL(std::exp((std::numeric_limits<Scalar>::min)()), data2[0]);
|
||||||
|
VERIFY_IS_EQUAL(std::exp(-(std::numeric_limits<Scalar>::min)()), data2[1]);
|
||||||
|
|
||||||
|
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
||||||
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
||||||
|
h.store(data2, internal::pexp(h.load(data1)));
|
||||||
|
VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
||||||
|
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::denorm_min()), data2[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i=0; i<size; ++i)
|
for (int i=0; i<size; ++i)
|
||||||
@ -331,12 +351,33 @@ template<typename Scalar> void packetmath_real()
|
|||||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
|
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
|
||||||
{
|
{
|
||||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||||
|
data1[1] = std::numeric_limits<Scalar>::epsilon();
|
||||||
packet_helper<internal::packet_traits<Scalar>::HasLog,Packet> h;
|
packet_helper<internal::packet_traits<Scalar>::HasLog,Packet> h;
|
||||||
h.store(data2, internal::plog(h.load(data1)));
|
h.store(data2, internal::plog(h.load(data1)));
|
||||||
VERIFY(numext::isnan(data2[0]));
|
VERIFY(std::isnan(data2[0]));
|
||||||
|
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
||||||
|
|
||||||
|
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
||||||
|
data1[1] = 0;
|
||||||
|
h.store(data2, internal::plog(h.load(data1)));
|
||||||
|
VERIFY(std::isnan(data2[0]));
|
||||||
|
// VERIFY_IS_EQUAL(std::log(0), data2[1]);
|
||||||
|
|
||||||
|
data1[0] = (std::numeric_limits<Scalar>::min)();
|
||||||
|
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
||||||
|
h.store(data2, internal::plog(h.load(data1)));
|
||||||
|
VERIFY_IS_EQUAL(std::log((std::numeric_limits<Scalar>::min)()), data2[0]);
|
||||||
|
// VERIFY(std::isnan(data2[1]));
|
||||||
|
|
||||||
|
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
||||||
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
||||||
|
h.store(data2, internal::plog(h.load(data1)));
|
||||||
|
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
||||||
|
// VERIFY(std::isnan(data2[1]));
|
||||||
|
|
||||||
data1[0] = -1.0f;
|
data1[0] = -1.0f;
|
||||||
h.store(data2, internal::plog(h.load(data1)));
|
h.store(data2, internal::plog(h.load(data1)));
|
||||||
VERIFY(numext::isnan(data2[0]));
|
VERIFY(std::isnan(data2[0]));
|
||||||
#if !EIGEN_FAST_MATH
|
#if !EIGEN_FAST_MATH
|
||||||
h.store(data2, internal::psqrt(h.load(data1)));
|
h.store(data2, internal::psqrt(h.load(data1)));
|
||||||
VERIFY(numext::isnan(data2[0]));
|
VERIFY(numext::isnan(data2[0]));
|
||||||
|
@ -23,8 +23,8 @@ template<typename MatrixType> void qr()
|
|||||||
MatrixType m1;
|
MatrixType m1;
|
||||||
createRandomPIMatrixOfRank(rank,rows,cols,m1);
|
createRandomPIMatrixOfRank(rank,rows,cols,m1);
|
||||||
ColPivHouseholderQR<MatrixType> qr(m1);
|
ColPivHouseholderQR<MatrixType> qr(m1);
|
||||||
VERIFY(rank == qr.rank());
|
VERIFY_IS_EQUAL(rank, qr.rank());
|
||||||
VERIFY(cols - qr.rank() == qr.dimensionOfKernel());
|
VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel());
|
||||||
VERIFY(!qr.isInjective());
|
VERIFY(!qr.isInjective());
|
||||||
VERIFY(!qr.isInvertible());
|
VERIFY(!qr.isInvertible());
|
||||||
VERIFY(!qr.isSurjective());
|
VERIFY(!qr.isSurjective());
|
||||||
@ -51,11 +51,11 @@ template<typename MatrixType, int Cols2> void qr_fixedsize()
|
|||||||
Matrix<Scalar,Rows,Cols> m1;
|
Matrix<Scalar,Rows,Cols> m1;
|
||||||
createRandomPIMatrixOfRank(rank,Rows,Cols,m1);
|
createRandomPIMatrixOfRank(rank,Rows,Cols,m1);
|
||||||
ColPivHouseholderQR<Matrix<Scalar,Rows,Cols> > qr(m1);
|
ColPivHouseholderQR<Matrix<Scalar,Rows,Cols> > qr(m1);
|
||||||
VERIFY(rank == qr.rank());
|
VERIFY_IS_EQUAL(rank, qr.rank());
|
||||||
VERIFY(Cols - qr.rank() == qr.dimensionOfKernel());
|
VERIFY_IS_EQUAL(Cols - qr.rank(), qr.dimensionOfKernel());
|
||||||
VERIFY(qr.isInjective() == (rank == Rows));
|
VERIFY_IS_EQUAL(qr.isInjective(), (rank == Rows));
|
||||||
VERIFY(qr.isSurjective() == (rank == Cols));
|
VERIFY_IS_EQUAL(qr.isSurjective(), (rank == Cols));
|
||||||
VERIFY(qr.isInvertible() == (qr.isInjective() && qr.isSurjective()));
|
VERIFY_IS_EQUAL(qr.isInvertible(), (qr.isInjective() && qr.isSurjective()));
|
||||||
|
|
||||||
Matrix<Scalar,Rows,Cols> r = qr.matrixQR().template triangularView<Upper>();
|
Matrix<Scalar,Rows,Cols> r = qr.matrixQR().template triangularView<Upper>();
|
||||||
Matrix<Scalar,Rows,Cols> c = qr.householderQ() * r * qr.colsPermutation().inverse();
|
Matrix<Scalar,Rows,Cols> c = qr.householderQ() * r * qr.colsPermutation().inverse();
|
||||||
|
@ -23,8 +23,8 @@ template<typename MatrixType> void qr()
|
|||||||
MatrixType m1;
|
MatrixType m1;
|
||||||
createRandomPIMatrixOfRank(rank,rows,cols,m1);
|
createRandomPIMatrixOfRank(rank,rows,cols,m1);
|
||||||
FullPivHouseholderQR<MatrixType> qr(m1);
|
FullPivHouseholderQR<MatrixType> qr(m1);
|
||||||
VERIFY(rank == qr.rank());
|
VERIFY_IS_EQUAL(rank, qr.rank());
|
||||||
VERIFY(cols - qr.rank() == qr.dimensionOfKernel());
|
VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel());
|
||||||
VERIFY(!qr.isInjective());
|
VERIFY(!qr.isInjective());
|
||||||
VERIFY(!qr.isInvertible());
|
VERIFY(!qr.isInvertible());
|
||||||
VERIFY(!qr.isSurjective());
|
VERIFY(!qr.isSurjective());
|
||||||
|
@ -221,6 +221,12 @@ int test_ref_overload_fun1(Ref<MatrixXf> ) { return 3; }
|
|||||||
int test_ref_overload_fun2(Ref<const MatrixXd> ) { return 4; }
|
int test_ref_overload_fun2(Ref<const MatrixXd> ) { return 4; }
|
||||||
int test_ref_overload_fun2(Ref<const MatrixXf> ) { return 5; }
|
int test_ref_overload_fun2(Ref<const MatrixXf> ) { return 5; }
|
||||||
|
|
||||||
|
void test_ref_ambiguous(const Ref<const ArrayXd> &A, Ref<ArrayXd> B)
|
||||||
|
{
|
||||||
|
B = A;
|
||||||
|
B = A - A;
|
||||||
|
}
|
||||||
|
|
||||||
// See also bug 969
|
// See also bug 969
|
||||||
void test_ref_overloads()
|
void test_ref_overloads()
|
||||||
{
|
{
|
||||||
@ -233,6 +239,9 @@ void test_ref_overloads()
|
|||||||
VERIFY( test_ref_overload_fun2(Ad)==4 );
|
VERIFY( test_ref_overload_fun2(Ad)==4 );
|
||||||
VERIFY( test_ref_overload_fun2(Ad+Bd)==4 );
|
VERIFY( test_ref_overload_fun2(Ad+Bd)==4 );
|
||||||
VERIFY( test_ref_overload_fun2(Af+Bf)==5 );
|
VERIFY( test_ref_overload_fun2(Af+Bf)==5 );
|
||||||
|
|
||||||
|
ArrayXd A, B;
|
||||||
|
test_ref_ambiguous(A, B);
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_ref()
|
void test_ref()
|
||||||
|
@ -365,6 +365,20 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
|
|||||||
VERIFY_IS_APPROX(m2, refMat2);
|
VERIFY_IS_APPROX(m2, refMat2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test diagonal to sparse
|
||||||
|
{
|
||||||
|
DenseVector d = DenseVector::Random(rows);
|
||||||
|
DenseMatrix refMat2 = d.asDiagonal();
|
||||||
|
SparseMatrixType m2(rows, rows);
|
||||||
|
m2 = d.asDiagonal();
|
||||||
|
VERIFY_IS_APPROX(m2, refMat2);
|
||||||
|
SparseMatrixType m3(d.asDiagonal());
|
||||||
|
VERIFY_IS_APPROX(m3, refMat2);
|
||||||
|
refMat2 += d.asDiagonal();
|
||||||
|
m2 += d.asDiagonal();
|
||||||
|
VERIFY_IS_APPROX(m2, refMat2);
|
||||||
|
}
|
||||||
|
|
||||||
// test conservative resize
|
// test conservative resize
|
||||||
{
|
{
|
||||||
std::vector< std::pair<StorageIndex,StorageIndex> > inc;
|
std::vector< std::pair<StorageIndex,StorageIndex> > inc;
|
||||||
|
@ -272,6 +272,7 @@ template<typename Solver> void check_sparse_spd_solving(Solver& solver, int maxS
|
|||||||
DenseVector b = it.rhs();
|
DenseVector b = it.rhs();
|
||||||
DenseVector refX = it.refX();
|
DenseVector refX = it.refX();
|
||||||
PermutationMatrix<Dynamic, Dynamic, StorageIndex> pnull;
|
PermutationMatrix<Dynamic, Dynamic, StorageIndex> pnull;
|
||||||
|
halfA.resize(A.rows(), A.cols());
|
||||||
if(Solver::UpLo == (Lower|Upper))
|
if(Solver::UpLo == (Lower|Upper))
|
||||||
halfA = A;
|
halfA = A;
|
||||||
else
|
else
|
||||||
|
@ -89,6 +89,11 @@ template<typename Scalar> void test_sparseqr_scalar()
|
|||||||
QtQ = Q * Q.adjoint();
|
QtQ = Q * Q.adjoint();
|
||||||
idM.resize(Q.rows(), Q.rows()); idM.setIdentity();
|
idM.resize(Q.rows(), Q.rows()); idM.setIdentity();
|
||||||
VERIFY(idM.isApprox(QtQ));
|
VERIFY(idM.isApprox(QtQ));
|
||||||
|
|
||||||
|
// Q to dense
|
||||||
|
DenseMat dQ;
|
||||||
|
dQ = solver.matrixQ();
|
||||||
|
VERIFY_IS_APPROX(Q, dQ);
|
||||||
}
|
}
|
||||||
void test_sparseqr()
|
void test_sparseqr()
|
||||||
{
|
{
|
||||||
|
@ -33,6 +33,7 @@ void svd_check_full(const MatrixType& m, const SvdType& svd)
|
|||||||
};
|
};
|
||||||
|
|
||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
|
typedef typename MatrixType::RealScalar RealScalar;
|
||||||
typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
|
typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
|
||||||
typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
|
typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
|
||||||
|
|
||||||
@ -40,7 +41,10 @@ void svd_check_full(const MatrixType& m, const SvdType& svd)
|
|||||||
sigma.diagonal() = svd.singularValues().template cast<Scalar>();
|
sigma.diagonal() = svd.singularValues().template cast<Scalar>();
|
||||||
MatrixUType u = svd.matrixU();
|
MatrixUType u = svd.matrixU();
|
||||||
MatrixVType v = svd.matrixV();
|
MatrixVType v = svd.matrixV();
|
||||||
VERIFY_IS_APPROX(m, u * sigma * v.adjoint());
|
RealScalar scaling = m.cwiseAbs().maxCoeff();
|
||||||
|
if(scaling<=(std::numeric_limits<RealScalar>::min)())
|
||||||
|
scaling = RealScalar(1);
|
||||||
|
VERIFY_IS_APPROX(m/scaling, u * (sigma/scaling) * v.adjoint());
|
||||||
VERIFY_IS_UNITARY(u);
|
VERIFY_IS_UNITARY(u);
|
||||||
VERIFY_IS_UNITARY(v);
|
VERIFY_IS_UNITARY(v);
|
||||||
}
|
}
|
||||||
@ -307,6 +311,7 @@ void svd_inf_nan()
|
|||||||
|
|
||||||
// Regression test for bug 286: JacobiSVD loops indefinitely with some
|
// Regression test for bug 286: JacobiSVD loops indefinitely with some
|
||||||
// matrices containing denormal numbers.
|
// matrices containing denormal numbers.
|
||||||
|
template<typename>
|
||||||
void svd_underoverflow()
|
void svd_underoverflow()
|
||||||
{
|
{
|
||||||
#if defined __INTEL_COMPILER
|
#if defined __INTEL_COMPILER
|
||||||
@ -384,6 +389,7 @@ void svd_all_trivial_2x2( void (*cb)(const MatrixType&,bool) )
|
|||||||
} while((id<int(value_set.size())).all());
|
} while((id<int(value_set.size())).all());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename>
|
||||||
void svd_preallocate()
|
void svd_preallocate()
|
||||||
{
|
{
|
||||||
Vector3f v(3.f, 2.f, 1.f);
|
Vector3f v(3.f, 2.f, 1.f);
|
||||||
|
@ -59,8 +59,10 @@
|
|||||||
#include "Eigen/Core"
|
#include "Eigen/Core"
|
||||||
|
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h"
|
||||||
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
|
||||||
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
|
||||||
@ -80,6 +82,7 @@
|
|||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h"
|
||||||
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h"
|
||||||
@ -88,6 +91,7 @@
|
|||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h"
|
||||||
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h"
|
||||||
|
@ -23,7 +23,7 @@ template <typename T, size_t n> class array {
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
|
EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
|
||||||
|
|
||||||
static const std::size_t size = n;
|
static const std::size_t size() { return n; }
|
||||||
|
|
||||||
T values[n];
|
T values[n];
|
||||||
|
|
||||||
|
@ -375,6 +375,28 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
|||||||
resize(dims);
|
resize(dims);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef EIGEN_EMULATE_CXX11_META_H
|
||||||
|
template <typename std::ptrdiff_t... Indices>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
void resize(const Sizes<Indices...>& dimensions) {
|
||||||
|
array<Index, NumIndices> dims;
|
||||||
|
for (std::size_t i = 0; i < NumIndices; ++i) {
|
||||||
|
dims[i] = static_cast<Index>(dimensions[i]);
|
||||||
|
}
|
||||||
|
resize(dims);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
void resize(const Sizes<V1, V2, V3, V4, V5>& dimensions) {
|
||||||
|
array<Index, NumIndices> dims;
|
||||||
|
for (std::size_t i = 0; i < NumIndices; ++i) {
|
||||||
|
dims[i] = static_cast<Index>(dimensions[i]);
|
||||||
|
}
|
||||||
|
resize(dims);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
bool checkIndexRange(const array<Index, NumIndices>& indices) const
|
bool checkIndexRange(const array<Index, NumIndices>& indices) const
|
||||||
|
@ -108,6 +108,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
return unaryExpr(internal::scalar_inverse_op<Scalar>());
|
return unaryExpr(internal::scalar_inverse_op<Scalar>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived>
|
||||||
|
tanh() const {
|
||||||
|
return unaryExpr(internal::scalar_tanh_op<Scalar>());
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived>
|
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived>
|
||||||
exp() const {
|
exp() const {
|
||||||
@ -295,11 +301,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::SumReducer<CoeffReturnType>());
|
return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::SumReducer<CoeffReturnType>());
|
||||||
}
|
}
|
||||||
|
|
||||||
const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||||
sum() const {
|
sum() const {
|
||||||
array<Index, NumDimensions> in_dims;
|
DimensionList<Index, NumDimensions> in_dims;
|
||||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::SumReducer<CoeffReturnType>());
|
||||||
return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::SumReducer<CoeffReturnType>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
@ -308,11 +313,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MeanReducer<CoeffReturnType>());
|
return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MeanReducer<CoeffReturnType>());
|
||||||
}
|
}
|
||||||
|
|
||||||
const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||||
mean() const {
|
mean() const {
|
||||||
array<Index, NumDimensions> in_dims;
|
DimensionList<Index, NumDimensions> in_dims;
|
||||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MeanReducer<CoeffReturnType>());
|
||||||
return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MeanReducer<CoeffReturnType>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
@ -321,11 +325,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::ProdReducer<CoeffReturnType>());
|
return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::ProdReducer<CoeffReturnType>());
|
||||||
}
|
}
|
||||||
|
|
||||||
const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||||
prod() const {
|
prod() const {
|
||||||
array<Index, NumDimensions> in_dims;
|
DimensionList<Index, NumDimensions> in_dims;
|
||||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::ProdReducer<CoeffReturnType>());
|
||||||
return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::ProdReducer<CoeffReturnType>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
@ -334,11 +337,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MaxReducer<CoeffReturnType>());
|
return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MaxReducer<CoeffReturnType>());
|
||||||
}
|
}
|
||||||
|
|
||||||
const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||||
maximum() const {
|
maximum() const {
|
||||||
array<Index, NumDimensions> in_dims;
|
DimensionList<Index, NumDimensions> in_dims;
|
||||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MaxReducer<CoeffReturnType>());
|
||||||
return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MaxReducer<CoeffReturnType>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
@ -347,11 +349,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MinReducer<CoeffReturnType>());
|
return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MinReducer<CoeffReturnType>());
|
||||||
}
|
}
|
||||||
|
|
||||||
const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||||
minimum() const {
|
minimum() const {
|
||||||
array<Index, NumDimensions> in_dims;
|
DimensionList<Index, NumDimensions> in_dims;
|
||||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MinReducer<CoeffReturnType>());
|
||||||
return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MinReducer<CoeffReturnType>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
@ -413,6 +414,26 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
padding_type);
|
padding_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>
|
||||||
|
extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols,
|
||||||
|
const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1,
|
||||||
|
const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = 0) const {
|
||||||
|
return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>
|
||||||
|
extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols,
|
||||||
|
const Index plane_stride, const Index row_stride, const Index col_stride,
|
||||||
|
const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride,
|
||||||
|
const Index padding_top_z, const Index padding_bottom_z,
|
||||||
|
const Index padding_top, const Index padding_bottom,
|
||||||
|
const Index padding_left, const Index padding_right, const Scalar padding_value = 0) const {
|
||||||
|
return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value);
|
||||||
|
}
|
||||||
|
|
||||||
// Morphing operators.
|
// Morphing operators.
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const TensorLayoutSwapOp<const Derived>
|
const TensorLayoutSwapOp<const Derived>
|
||||||
@ -460,6 +481,18 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
return TensorStridingOp<const Strides, const Derived>(derived(), strides);
|
return TensorStridingOp<const Strides, const Derived>(derived(), strides);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Added support for custom unary and binary operations
|
||||||
|
template <typename CustomUnaryFunc>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const TensorCustomUnaryOp<const CustomUnaryFunc, const Derived> customOp(const CustomUnaryFunc& op) const {
|
||||||
|
return TensorCustomUnaryOp<const CustomUnaryFunc, const Derived>(derived(), op);
|
||||||
|
}
|
||||||
|
template <typename OtherDerived, typename CustomBinaryFunc>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived> customOp(const OtherDerived& other, const CustomBinaryFunc& op) const {
|
||||||
|
return TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived>(derived(), other, op);
|
||||||
|
}
|
||||||
|
|
||||||
// Force the evaluation of the expression.
|
// Force the evaluation of the expression.
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const TensorForcedEvalOp<const Derived> eval() const {
|
const TensorForcedEvalOp<const Derived> eval() const {
|
||||||
|
@ -106,8 +106,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
|
typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess;
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice());
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,8 +116,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess;
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice());
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +88,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign;
|
typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,7 +98,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -334,7 +334,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
|
|||||||
eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());
|
eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());
|
||||||
|
|
||||||
EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
|
EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
|
||||||
PacketReturnType rslt = internal::pstore<PacketReturnType>(values, x);
|
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||||
for (int i = 0; i < packetSize; ++i) {
|
for (int i = 0; i < packetSize; ++i) {
|
||||||
coeffRef(index+i) = values[i];
|
coeffRef(index+i) = values[i];
|
||||||
}
|
}
|
||||||
|
@ -364,14 +364,6 @@ class TensorContractionInputMapper<Scalar, Index, side, Tensor, nocontract_t, co
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <size_t n> struct max_n_1 {
|
|
||||||
static const size_t size = n;
|
|
||||||
};
|
|
||||||
template <> struct max_n_1<0> {
|
|
||||||
static const size_t size = 1;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Dimensions, typename LhsXprType, typename RhsXprType>
|
template<typename Dimensions, typename LhsXprType, typename RhsXprType>
|
||||||
struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >
|
struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >
|
||||||
{
|
{
|
||||||
@ -459,19 +451,6 @@ class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXp
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template<bool cond> struct Cond {};
|
|
||||||
|
|
||||||
template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
||||||
const T1& choose(Cond<true>, const T1& first, const T2&) {
|
|
||||||
return first;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|
||||||
const T2& choose(Cond<false>, const T1&, const T2& second) {
|
|
||||||
return second;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
struct TensorContractionEvaluatorBase
|
struct TensorContractionEvaluatorBase
|
||||||
{
|
{
|
||||||
@ -508,13 +487,13 @@ struct TensorContractionEvaluatorBase
|
|||||||
static const int RDims =
|
static const int RDims =
|
||||||
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
|
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
|
||||||
static const unsigned int ContractDims = internal::array_size<Indices>::value;
|
static const unsigned int ContractDims = internal::array_size<Indices>::value;
|
||||||
static const int NumDims = internal::max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||||
|
|
||||||
typedef array<Index, LDims> left_dim_mapper_t;
|
typedef array<Index, LDims> left_dim_mapper_t;
|
||||||
typedef array<Index, RDims> right_dim_mapper_t;
|
typedef array<Index, RDims> right_dim_mapper_t;
|
||||||
typedef array<Index, ContractDims> contract_t;
|
typedef array<Index, ContractDims> contract_t;
|
||||||
typedef array<Index, internal::max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||||
typedef array<Index, internal::max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||||
|
|
||||||
typedef DSizes<Index, NumDims> Dimensions;
|
typedef DSizes<Index, NumDims> Dimensions;
|
||||||
|
|
||||||
@ -869,10 +848,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
typedef array<Index, RDims> right_dim_mapper_t;
|
typedef array<Index, RDims> right_dim_mapper_t;
|
||||||
|
|
||||||
typedef array<Index, ContractDims> contract_t;
|
typedef array<Index, ContractDims> contract_t;
|
||||||
typedef array<Index, internal::max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||||
typedef array<Index, internal::max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||||
|
|
||||||
static const int NumDims = internal::max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||||
|
|
||||||
// Could we use NumDimensions here?
|
// Could we use NumDimensions here?
|
||||||
typedef DSizes<Index, NumDims> Dimensions;
|
typedef DSizes<Index, NumDims> Dimensions;
|
||||||
|
@ -1241,10 +1241,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
typedef array<Index, RDims> right_dim_mapper_t;
|
typedef array<Index, RDims> right_dim_mapper_t;
|
||||||
|
|
||||||
typedef array<Index, ContractDims> contract_t;
|
typedef array<Index, ContractDims> contract_t;
|
||||||
typedef array<Index, internal::max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||||
typedef array<Index, internal::max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||||
|
|
||||||
static const int NumDims = internal::max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||||
|
|
||||||
typedef DSizes<Index, NumDims> Dimensions;
|
typedef DSizes<Index, NumDims> Dimensions;
|
||||||
|
|
||||||
|
@ -93,10 +93,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
typedef array<Index, RDims> right_dim_mapper_t;
|
typedef array<Index, RDims> right_dim_mapper_t;
|
||||||
|
|
||||||
typedef array<Index, ContractDims> contract_t;
|
typedef array<Index, ContractDims> contract_t;
|
||||||
typedef array<Index, internal::max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||||
typedef array<Index, internal::max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||||
|
|
||||||
static const int NumDims = internal::max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||||
|
|
||||||
typedef DSizes<Index, NumDims> Dimensions;
|
typedef DSizes<Index, NumDims> Dimensions;
|
||||||
|
|
||||||
|
@ -510,7 +510,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
||||||
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
||||||
EvalTo evalToTmp(local, m_kernelArg);
|
EvalTo evalToTmp(local, m_kernelArg);
|
||||||
internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<KernelArgType, Device>::PacketAccess>::run(evalToTmp, m_device);
|
const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
|
||||||
|
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
|
||||||
|
|
||||||
m_kernel = local;
|
m_kernel = local;
|
||||||
m_local_kernel = true;
|
m_local_kernel = true;
|
||||||
@ -815,7 +816,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
||||||
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
||||||
EvalTo evalToTmp(local, m_kernelArg);
|
EvalTo evalToTmp(local, m_kernelArg);
|
||||||
internal::TensorExecutor<const EvalTo, GpuDevice, TensorEvaluator<KernelArgType, GpuDevice>::PacketAccess>::run(evalToTmp, m_device);
|
const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
|
||||||
|
internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device);
|
||||||
|
|
||||||
m_kernel = local;
|
m_kernel = local;
|
||||||
m_local_kernel = true;
|
m_local_kernel = true;
|
||||||
|
310
unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
Normal file
310
unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
Normal file
@ -0,0 +1,310 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
|
||||||
|
#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
/** \class TensorCustomUnaryOp
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Tensor custom class.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
namespace internal {
|
||||||
|
template<typename CustomUnaryFunc, typename XprType>
|
||||||
|
struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
|
||||||
|
{
|
||||||
|
typedef typename XprType::Scalar Scalar;
|
||||||
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
typedef typename XprType::StorageKind StorageKind;
|
||||||
|
typedef typename XprType::Index Index;
|
||||||
|
typedef typename XprType::Nested Nested;
|
||||||
|
typedef typename remove_reference<Nested>::type _Nested;
|
||||||
|
static const int NumDimensions = traits<XprType>::NumDimensions;
|
||||||
|
static const int Layout = traits<XprType>::Layout;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename CustomUnaryFunc, typename XprType>
|
||||||
|
struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense>
|
||||||
|
{
|
||||||
|
typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename CustomUnaryFunc, typename XprType>
|
||||||
|
struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
|
||||||
|
{
|
||||||
|
typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<typename CustomUnaryFunc, typename XprType>
|
||||||
|
class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar;
|
||||||
|
typedef typename internal::traits<TensorCustomUnaryOp>::Packet Packet;
|
||||||
|
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||||
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||||
|
typedef typename internal::nested<TensorCustomUnaryOp>::type Nested;
|
||||||
|
typedef typename internal::traits<TensorCustomUnaryOp>::StorageKind StorageKind;
|
||||||
|
typedef typename internal::traits<TensorCustomUnaryOp>::Index Index;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func)
|
||||||
|
: m_expr(expr), m_func(func) {}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
const CustomUnaryFunc& func() const { return m_func; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||||
|
expression() const { return m_expr; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
typename XprType::Nested m_expr;
|
||||||
|
const CustomUnaryFunc m_func;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Eval as rvalue
|
||||||
|
template<typename CustomUnaryFunc, typename XprType, typename Device>
|
||||||
|
struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device>
|
||||||
|
{
|
||||||
|
typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType;
|
||||||
|
typedef typename internal::traits<ArgType>::Index Index;
|
||||||
|
static const int NumDims = internal::traits<ArgType>::NumDimensions;
|
||||||
|
typedef DSizes<Index, NumDims> Dimensions;
|
||||||
|
typedef
|
||||||
|
typename internal::remove_const<typename ArgType::Scalar>::type Scalar;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
IsAligned = false,
|
||||||
|
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||||
|
BlockAccess = false,
|
||||||
|
Layout = TensorEvaluator<XprType, Device>::Layout,
|
||||||
|
CoordAccess = false, // to be implemented
|
||||||
|
};
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device)
|
||||||
|
: m_op(op), m_device(device), m_result(NULL)
|
||||||
|
{
|
||||||
|
m_dimensions = op.func().dimensions(op.expression());
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
|
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
||||||
|
if (data) {
|
||||||
|
evalTo(data);
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
m_result = static_cast<CoeffReturnType*>(
|
||||||
|
m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
|
||||||
|
evalTo(m_result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
|
if (m_result != NULL) {
|
||||||
|
m_device.deallocate(m_result);
|
||||||
|
m_result = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||||
|
return m_result[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
|
||||||
|
return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
|
||||||
|
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(
|
||||||
|
data, m_dimensions);
|
||||||
|
m_op.func().eval(m_op.expression(), result, m_device);
|
||||||
|
}
|
||||||
|
|
||||||
|
Dimensions m_dimensions;
|
||||||
|
const ArgType m_op;
|
||||||
|
const Device& m_device;
|
||||||
|
CoeffReturnType* m_result;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/** \class TensorCustomBinaryOp
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Tensor custom class.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
namespace internal {
|
||||||
|
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
|
||||||
|
struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
|
||||||
|
{
|
||||||
|
typedef typename internal::promote_storage_type<typename LhsXprType::Scalar,
|
||||||
|
typename RhsXprType::Scalar>::ret Scalar;
|
||||||
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType,
|
||||||
|
typename RhsXprType::CoeffReturnType>::ret CoeffReturnType;
|
||||||
|
typedef typename internal::promote_storage_type<typename LhsXprType::PacketReturnType,
|
||||||
|
typename RhsXprType::PacketReturnType>::ret PacketReturnType;
|
||||||
|
typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
|
||||||
|
typename traits<RhsXprType>::StorageKind>::ret StorageKind;
|
||||||
|
typedef typename promote_index_type<typename traits<LhsXprType>::Index,
|
||||||
|
typename traits<RhsXprType>::Index>::type Index;
|
||||||
|
typedef typename LhsXprType::Nested LhsNested;
|
||||||
|
typedef typename RhsXprType::Nested RhsNested;
|
||||||
|
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||||
|
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||||
|
static const int NumDimensions = traits<LhsXprType>::NumDimensions;
|
||||||
|
static const int Layout = traits<LhsXprType>::Layout;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
|
||||||
|
struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense>
|
||||||
|
{
|
||||||
|
typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
|
||||||
|
struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
|
||||||
|
{
|
||||||
|
typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
|
||||||
|
class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar;
|
||||||
|
typedef typename internal::traits<TensorCustomBinaryOp>::Packet Packet;
|
||||||
|
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||||
|
typedef typename internal::traits<TensorCustomBinaryOp>::CoeffReturnType CoeffReturnType;
|
||||||
|
typedef typename internal::traits<TensorCustomBinaryOp>::PacketReturnType PacketReturnType;
|
||||||
|
typedef typename internal::nested<TensorCustomBinaryOp>::type Nested;
|
||||||
|
typedef typename internal::traits<TensorCustomBinaryOp>::StorageKind StorageKind;
|
||||||
|
typedef typename internal::traits<TensorCustomBinaryOp>::Index Index;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func)
|
||||||
|
|
||||||
|
: m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
const CustomBinaryFunc& func() const { return m_func; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
const typename internal::remove_all<typename LhsXprType::Nested>::type&
|
||||||
|
lhsExpression() const { return m_lhs_xpr; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
const typename internal::remove_all<typename RhsXprType::Nested>::type&
|
||||||
|
rhsExpression() const { return m_rhs_xpr; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
typename LhsXprType::Nested m_lhs_xpr;
|
||||||
|
typename RhsXprType::Nested m_rhs_xpr;
|
||||||
|
const CustomBinaryFunc m_func;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Eval as rvalue
|
||||||
|
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device>
|
||||||
|
struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device>
|
||||||
|
{
|
||||||
|
typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType;
|
||||||
|
typedef typename internal::traits<XprType>::Index Index;
|
||||||
|
static const int NumDims = internal::traits<XprType>::NumDimensions;
|
||||||
|
typedef DSizes<Index, NumDims> Dimensions;
|
||||||
|
typedef typename XprType::Scalar Scalar;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
IsAligned = false,
|
||||||
|
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||||
|
BlockAccess = false,
|
||||||
|
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
|
||||||
|
CoordAccess = false, // to be implemented
|
||||||
|
};
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
|
: m_op(op), m_device(device), m_result(NULL)
|
||||||
|
{
|
||||||
|
m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression());
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
|
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
||||||
|
if (data) {
|
||||||
|
evalTo(data);
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
|
||||||
|
evalTo(m_result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
|
if (m_result != NULL) {
|
||||||
|
m_device.deallocate(m_result);
|
||||||
|
m_result = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||||
|
return m_result[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
|
||||||
|
return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
|
||||||
|
TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions);
|
||||||
|
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
|
||||||
|
}
|
||||||
|
|
||||||
|
Dimensions m_dimensions;
|
||||||
|
const XprType m_op;
|
||||||
|
const Device& m_device;
|
||||||
|
CoeffReturnType* m_result;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
|
@ -28,8 +28,25 @@ struct DefaultDevice {
|
|||||||
::memset(buffer, c, n);
|
::memset(buffer, c, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE size_t numThreads() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
// Running on the host CPU
|
||||||
return 1;
|
return 1;
|
||||||
|
#else
|
||||||
|
// Running on a CUDA device
|
||||||
|
return 32;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
// Running single threaded on the host CPU
|
||||||
|
// Should return an enum that encodes the ISA supported by the CPU
|
||||||
|
return 1;
|
||||||
|
#else
|
||||||
|
// Running on a CUDA device
|
||||||
|
return __CUDA_ARCH__ / 100;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -38,10 +55,19 @@ struct DefaultDevice {
|
|||||||
// We should really use a thread pool here but first we need to find a portable thread pool library.
|
// We should really use a thread pool here but first we need to find a portable thread pool library.
|
||||||
#ifdef EIGEN_USE_THREADS
|
#ifdef EIGEN_USE_THREADS
|
||||||
|
|
||||||
|
// This defines an interface that ThreadPoolDevice can take to use
|
||||||
|
// custom thread pools underneath.
|
||||||
|
class ThreadPoolInterface {
|
||||||
|
public:
|
||||||
|
virtual void Schedule(std::function<void()> fn) = 0;
|
||||||
|
|
||||||
|
virtual ~ThreadPoolInterface() {}
|
||||||
|
};
|
||||||
|
|
||||||
// The implementation of the ThreadPool type ensures that the Schedule method
|
// The implementation of the ThreadPool type ensures that the Schedule method
|
||||||
// runs the functions it is provided in FIFO order when the scheduling is done
|
// runs the functions it is provided in FIFO order when the scheduling is done
|
||||||
// by a single thread.
|
// by a single thread.
|
||||||
class ThreadPool {
|
class ThreadPool : public ThreadPoolInterface {
|
||||||
public:
|
public:
|
||||||
// Construct a pool that contains "num_threads" threads.
|
// Construct a pool that contains "num_threads" threads.
|
||||||
explicit ThreadPool(int num_threads) {
|
explicit ThreadPool(int num_threads) {
|
||||||
@ -182,7 +208,7 @@ static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) {
|
|||||||
|
|
||||||
// Build a thread pool device on top the an existing pool of threads.
|
// Build a thread pool device on top the an existing pool of threads.
|
||||||
struct ThreadPoolDevice {
|
struct ThreadPoolDevice {
|
||||||
ThreadPoolDevice(ThreadPool* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { }
|
ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { }
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
||||||
return internal::aligned_malloc(num_bytes);
|
return internal::aligned_malloc(num_bytes);
|
||||||
@ -204,6 +230,11 @@ struct ThreadPoolDevice {
|
|||||||
return num_threads_;
|
return num_threads_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||||
|
// Should return an enum that encodes the ISA supported by the CPU
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
template <class Function, class... Args>
|
template <class Function, class... Args>
|
||||||
EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const {
|
EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const {
|
||||||
Notification* n = new Notification();
|
Notification* n = new Notification();
|
||||||
@ -219,7 +250,7 @@ struct ThreadPoolDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ThreadPool* pool_;
|
ThreadPoolInterface* pool_;
|
||||||
size_t num_threads_;
|
size_t num_threads_;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -260,9 +291,12 @@ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
|
|||||||
assert(status == cudaSuccess);
|
assert(status == cudaSuccess);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cuda stream to use when no stream is specified explicitely.
|
||||||
|
static const cudaStream_t default_stream = cudaStreamDefault;
|
||||||
|
|
||||||
struct GpuDevice {
|
struct GpuDevice {
|
||||||
// The cudastream is not owned: the caller is responsible for its initialization and eventual destruction.
|
// The cudastream is not owned: the caller is responsible for its initialization and eventual destruction.
|
||||||
GpuDevice(const cudaStream_t* stream) : stream_(stream) { eigen_assert(stream); }
|
GpuDevice(const cudaStream_t* stream = &default_stream) : stream_(stream) { eigen_assert(stream); }
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; }
|
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; }
|
||||||
|
|
||||||
@ -308,6 +342,8 @@ struct GpuDevice {
|
|||||||
return 32;
|
return 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline int majorDeviceVersion() const { return m_deviceProperties.major; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
|
||||||
cudaStreamSynchronize(*stream_);
|
cudaStreamSynchronize(*stream_);
|
||||||
}
|
}
|
||||||
|
235
unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
Normal file
235
unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
|
||||||
|
#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
/** \internal
|
||||||
|
*
|
||||||
|
* \class TensorDimensionList
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n.
|
||||||
|
*
|
||||||
|
* \sa Tensor
|
||||||
|
*/
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank> struct DimensionList {
|
||||||
|
const Index operator[] (const Index i) const { return i; }
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
template<typename Index, std::size_t Rank> struct array_size<DimensionList<Index, Rank> > {
|
||||||
|
static const size_t value = Rank;
|
||||||
|
};
|
||||||
|
template<typename Index, std::size_t Rank> struct array_size<const DimensionList<Index, Rank> > {
|
||||||
|
static const size_t value = Rank;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(DimensionList<Index, Rank>&) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(const DimensionList<Index, Rank>&) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(EIGEN_HAS_CONSTEXPR)
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_known_statically<DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex) const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_known_statically<const DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex) const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct all_indices_known_statically<DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() () const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct all_indices_known_statically<const DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() () const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct indices_statically_known_to_increase<DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() () const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct indices_statically_known_to_increase<const DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() () const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_eq<DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||||
|
return i == value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_eq<const DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||||
|
return i == value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_ne<DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||||
|
return i != value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_ne<const DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||||
|
return i != value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_gt<DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||||
|
return i > value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_gt<const DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||||
|
return i > value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_lt<DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||||
|
return i < value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_lt<const DimensionList<Index, Rank> > {
|
||||||
|
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||||
|
return i < value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#else
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_known_statically<DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_known_statically<const DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct all_indices_known_statically<DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() () const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct all_indices_known_statically<const DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() () const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct indices_statically_known_to_increase<DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() () const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct indices_statically_known_to_increase<const DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() () const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_eq<DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_eq<const DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_ne<DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_ne<const DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_gt<DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_gt<const DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_lt<DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, std::size_t Rank>
|
||||||
|
struct index_statically_lt<const DimensionList<Index, Rank> > {
|
||||||
|
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
|
||||||
|
#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
|
@ -69,6 +69,31 @@ struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMaj
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename Index, std::size_t n>
|
||||||
|
struct fixed_size_tensor_index_extraction_helper
|
||||||
|
{
|
||||||
|
template <typename Dimensions> EIGEN_DEVICE_FUNC
|
||||||
|
static inline Index run(const Index index,
|
||||||
|
const Dimensions& dimensions)
|
||||||
|
{
|
||||||
|
const Index mult = (index == n) ? 1 : 0;
|
||||||
|
return array_get<n>(dimensions) * mult +
|
||||||
|
fixed_size_tensor_index_extraction_helper<Index, n - 1>::run(index, dimensions);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Index>
|
||||||
|
struct fixed_size_tensor_index_extraction_helper<Index, 0>
|
||||||
|
{
|
||||||
|
template <typename Dimensions> EIGEN_DEVICE_FUNC
|
||||||
|
static inline Index run(const Index index,
|
||||||
|
const Dimensions& dimensions)
|
||||||
|
{
|
||||||
|
const Index mult = (index == 0) ? 1 : 0;
|
||||||
|
return array_get<0>(dimensions) * mult;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
|
||||||
@ -99,6 +124,10 @@ struct Sizes : internal::numeric_list<std::ptrdiff_t, Indices...> {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const int index) const {
|
||||||
|
return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count - 1>::run(index, *this);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T> Sizes& operator = (const T& /*other*/) {
|
template <typename T> Sizes& operator = (const T& /*other*/) {
|
||||||
// add assertion failure if the size of other is different
|
// add assertion failure if the size of other is different
|
||||||
return *this;
|
return *this;
|
||||||
@ -114,10 +143,12 @@ struct Sizes : internal::numeric_list<std::ptrdiff_t, Indices...> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
template <typename std::ptrdiff_t... Indices>
|
template <typename std::ptrdiff_t... Indices>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indices...>&) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indices...>&) {
|
||||||
return Sizes<Indices...>::total_size;
|
return Sizes<Indices...>::total_size;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@ -166,6 +197,24 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex operator[] (const int index) const {
|
||||||
|
switch (index) {
|
||||||
|
case 0:
|
||||||
|
return internal::get<0, Base>::value;
|
||||||
|
case 1:
|
||||||
|
return internal::get<1, Base>::value;
|
||||||
|
case 2:
|
||||||
|
return internal::get<2, Base>::value;
|
||||||
|
case 3:
|
||||||
|
return internal::get<3, Base>::value;
|
||||||
|
case 4:
|
||||||
|
return internal::get<4, Base>::value;
|
||||||
|
default:
|
||||||
|
eigen_assert(false && "index overflow");
|
||||||
|
return static_cast<std::size_t>(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T> Sizes& operator = (const T&) {
|
template <typename T> Sizes& operator = (const T&) {
|
||||||
// to do: check the size of other
|
// to do: check the size of other
|
||||||
return *this;
|
return *this;
|
||||||
@ -181,10 +230,12 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
|
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
|
||||||
return Sizes<V1, V2, V3, V4, V5>::total_size;
|
return Sizes<V1, V2, V3, V4, V5>::total_size;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -113,9 +113,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
eigen_assert(scalar == NULL);
|
||||||
return true;
|
return m_impl.evalSubExprsIfNeeded(m_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
|
||||||
|
@ -22,13 +22,8 @@ namespace Eigen {
|
|||||||
*/
|
*/
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template <typename Device, typename Expression>
|
|
||||||
struct IsVectorizable {
|
|
||||||
static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Default strategy: the expression is evaluated with a single cpu thread.
|
// Default strategy: the expression is evaluated with a single cpu thread.
|
||||||
template<typename Expression, typename Device = DefaultDevice, bool Vectorizable = IsVectorizable<Device, Expression>::value>
|
template<typename Expression, typename Device, bool Vectorizable>
|
||||||
class TensorExecutor
|
class TensorExecutor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -198,10 +193,6 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Expression>
|
|
||||||
struct IsVectorizable<GpuDevice, Expression> {
|
|
||||||
static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess && TensorEvaluator<Expression, GpuDevice>::IsAligned;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Expression>
|
template<typename Expression>
|
||||||
class TensorExecutor<Expression, GpuDevice, false>
|
class TensorExecutor<Expression, GpuDevice, false>
|
||||||
|
@ -116,7 +116,8 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
}
|
}
|
||||||
typedef TensorEvalToOp<const ArgType> EvalTo;
|
typedef TensorEvalToOp<const ArgType> EvalTo;
|
||||||
EvalTo evalToTmp(m_buffer, m_op);
|
EvalTo evalToTmp(m_buffer, m_op);
|
||||||
internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<ArgType, Device>::PacketAccess>::run(evalToTmp, m_device);
|
const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value;
|
||||||
|
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
|
||||||
m_impl.cleanup();
|
m_impl.cleanup();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,7 @@ template<typename TargetType, typename XprType> class TensorConversionOp;
|
|||||||
template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp;
|
template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp;
|
||||||
template<typename PatchDim, typename XprType> class TensorPatchOp;
|
template<typename PatchDim, typename XprType> class TensorPatchOp;
|
||||||
template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp;
|
template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp;
|
||||||
|
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorVolumePatchOp;
|
||||||
template<typename Broadcast, typename XprType> class TensorBroadcastingOp;
|
template<typename Broadcast, typename XprType> class TensorBroadcastingOp;
|
||||||
template<DenseIndex DimId, typename XprType> class TensorChippingOp;
|
template<DenseIndex DimId, typename XprType> class TensorChippingOp;
|
||||||
template<typename NewDimensions, typename XprType> class TensorReshapingOp;
|
template<typename NewDimensions, typename XprType> class TensorReshapingOp;
|
||||||
@ -41,14 +42,36 @@ template<typename Strides, typename XprType> class TensorStridingOp;
|
|||||||
template<typename Generator, typename XprType> class TensorGeneratorOp;
|
template<typename Generator, typename XprType> class TensorGeneratorOp;
|
||||||
template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
|
template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
|
||||||
|
|
||||||
|
template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
|
||||||
|
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
|
||||||
|
|
||||||
template<typename XprType> class TensorEvalToOp;
|
template<typename XprType> class TensorEvalToOp;
|
||||||
template<typename XprType> class TensorForcedEvalOp;
|
template<typename XprType> class TensorForcedEvalOp;
|
||||||
|
|
||||||
template<typename ExpressionType, typename DeviceType> class TensorDevice;
|
template<typename ExpressionType, typename DeviceType> class TensorDevice;
|
||||||
template<typename Derived, typename Device> struct TensorEvaluator;
|
template<typename Derived, typename Device> struct TensorEvaluator;
|
||||||
|
|
||||||
|
class DefaultDevice;
|
||||||
|
class ThreadPoolDevice;
|
||||||
|
class GpuDevice;
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
template<typename Expression, typename Device, bool Vectorizable> class TensorExecutor;
|
|
||||||
|
template <typename Device, typename Expression>
|
||||||
|
struct IsVectorizable {
|
||||||
|
static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expression>
|
||||||
|
struct IsVectorizable<GpuDevice, Expression> {
|
||||||
|
static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess &&
|
||||||
|
TensorEvaluator<Expression, GpuDevice>::IsAligned;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expression, typename Device,
|
||||||
|
bool Vectorizable = IsVectorizable<Device, Expression>::value>
|
||||||
|
class TensorExecutor;
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -17,6 +17,7 @@ namespace internal {
|
|||||||
template <typename T> struct SumReducer
|
template <typename T> struct SumReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = true;
|
static const bool PacketAccess = true;
|
||||||
|
static const bool IsStateful = false;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||||
(*accum) += t;
|
(*accum) += t;
|
||||||
@ -49,6 +50,8 @@ template <typename T> struct SumReducer
|
|||||||
template <typename T> struct MeanReducer
|
template <typename T> struct MeanReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = true;
|
static const bool PacketAccess = true;
|
||||||
|
static const bool IsStateful = true;
|
||||||
|
|
||||||
MeanReducer() : scalarCount_(0), packetCount_(0) { }
|
MeanReducer() : scalarCount_(0), packetCount_(0) { }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
|
||||||
@ -88,6 +91,7 @@ template <typename T> struct MeanReducer
|
|||||||
template <typename T> struct MaxReducer
|
template <typename T> struct MaxReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = true;
|
static const bool PacketAccess = true;
|
||||||
|
static const bool IsStateful = false;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||||
if (t > *accum) { *accum = t; }
|
if (t > *accum) { *accum = t; }
|
||||||
@ -120,6 +124,7 @@ template <typename T> struct MaxReducer
|
|||||||
template <typename T> struct MinReducer
|
template <typename T> struct MinReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = true;
|
static const bool PacketAccess = true;
|
||||||
|
static const bool IsStateful = false;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||||
if (t < *accum) { *accum = t; }
|
if (t < *accum) { *accum = t; }
|
||||||
@ -153,6 +158,7 @@ template <typename T> struct MinReducer
|
|||||||
template <typename T> struct ProdReducer
|
template <typename T> struct ProdReducer
|
||||||
{
|
{
|
||||||
static const bool PacketAccess = true;
|
static const bool PacketAccess = true;
|
||||||
|
static const bool IsStateful = false;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||||
(*accum) *= t;
|
(*accum) *= t;
|
||||||
|
@ -90,7 +90,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign;
|
typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,7 +100,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
36
unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
Normal file
36
unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H
|
||||||
|
#define EIGEN_CXX11_TENSOR_TENSOR_META_H
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
template<bool cond> struct Cond {};
|
||||||
|
|
||||||
|
template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
const T1& choose(Cond<true>, const T1& first, const T2&) {
|
||||||
|
return first;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
const T2& choose(Cond<false>, const T1&, const T2& second) {
|
||||||
|
return second;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <size_t n> struct max_n_1 {
|
||||||
|
static const size_t size = n;
|
||||||
|
};
|
||||||
|
template <> struct max_n_1<0> {
|
||||||
|
static const size_t size = 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_CXX11_TENSOR_TENSOR_META_H
|
@ -78,7 +78,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
|
typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,7 +88,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -262,7 +262,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -271,7 +271,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
|
typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -411,7 +411,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
{
|
{
|
||||||
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||||
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
|
||||||
|
|
||||||
Index inputIndices[] = {0, 0};
|
Index inputIndices[] = {0, 0};
|
||||||
Index indices[] = {index, index + packetSize - 1};
|
Index indices[] = {index, index + packetSize - 1};
|
||||||
|
@ -44,6 +44,38 @@ struct nested<TensorReductionOp<Op, Dims, XprType>, 1, typename eval<TensorReduc
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename OutputDims> struct DimInitializer {
|
||||||
|
template <typename InputDims, typename ReducedDims> EIGEN_DEVICE_FUNC
|
||||||
|
static void run(const InputDims& input_dims,
|
||||||
|
const array<bool, internal::array_size<InputDims>::value>& reduced,
|
||||||
|
OutputDims* output_dims, ReducedDims* reduced_dims) {
|
||||||
|
const int NumInputDims = internal::array_size<InputDims>::value;
|
||||||
|
int outputIndex = 0;
|
||||||
|
int reduceIndex = 0;
|
||||||
|
for (int i = 0; i < NumInputDims; ++i) {
|
||||||
|
if (reduced[i]) {
|
||||||
|
(*reduced_dims)[reduceIndex] = input_dims[i];
|
||||||
|
++reduceIndex;
|
||||||
|
} else {
|
||||||
|
(*output_dims)[outputIndex] = input_dims[i];
|
||||||
|
++outputIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct DimInitializer<Sizes<1> > {
|
||||||
|
template <typename InputDims, typename Index, size_t Rank> EIGEN_DEVICE_FUNC
|
||||||
|
static void run(const InputDims& input_dims, const array<bool, Rank>&,
|
||||||
|
Sizes<1>*, array<Index, Rank>* reduced_dims) {
|
||||||
|
const int NumInputDims = internal::array_size<InputDims>::value;
|
||||||
|
for (int i = 0; i < NumInputDims; ++i) {
|
||||||
|
(*reduced_dims)[i] = input_dims[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename ReducedDims, int NumTensorDims, int Layout>
|
template <typename ReducedDims, int NumTensorDims, int Layout>
|
||||||
struct are_inner_most_dims {
|
struct are_inner_most_dims {
|
||||||
static const bool value = false;
|
static const bool value = false;
|
||||||
@ -144,7 +176,7 @@ template <int DimIndex, typename Self, typename Op>
|
|||||||
struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
|
struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
|
||||||
EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
|
for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
|
||||||
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
|
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
|
||||||
InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
|
InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
|
||||||
}
|
}
|
||||||
@ -154,13 +186,325 @@ struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
|
|||||||
template <typename Self, typename Op>
|
template <typename Self, typename Op>
|
||||||
struct InnerMostDimPreserver<0, Self, Op, true> {
|
struct InnerMostDimPreserver<0, Self, Op, true> {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
|
||||||
for (int j = 0; j < self.m_reducedDims[0]; ++j) {
|
for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) {
|
||||||
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0];
|
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0];
|
||||||
reducer.reducePacket(self.m_impl.template packet<Unaligned>(input), accum);
|
reducer.reducePacket(self.m_impl.template packet<Unaligned>(input), accum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Default full reducer
|
||||||
|
template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
|
||||||
|
struct FullReducer {
|
||||||
|
static const bool HasOptimizedImplementation = false;
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) {
|
||||||
|
const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||||
|
*output = InnerMostDimReducer<Self, Op>::reduce(self, 0, num_coeffs, reducer);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_THREADS
|
||||||
|
// Multithreaded full reducers
|
||||||
|
template <typename Eval, typename Op, bool Vectorizable = (Eval::InputPacketAccess & Op::PacketAccess)>
|
||||||
|
struct FullReducerShard {
|
||||||
|
static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) {
|
||||||
|
|
||||||
|
shard->saccum = reducer.initialize();
|
||||||
|
for (typename Eval::Index j = 0; j < numValuesToReduce; ++j) {
|
||||||
|
reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typename Eval::CoeffReturnType saccum;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Eval, typename Op>
|
||||||
|
struct FullReducerShard<Eval, Op, true> {
|
||||||
|
static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) {
|
||||||
|
|
||||||
|
const int packetSize = internal::unpacket_traits<typename Eval::PacketReturnType>::size;
|
||||||
|
const typename Eval::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
|
||||||
|
|
||||||
|
shard->paccum = reducer.template initializePacket<typename Eval::PacketReturnType>();
|
||||||
|
for (typename Eval::Index j = 0; j < VectorizedSize; j += packetSize) {
|
||||||
|
reducer.reducePacket(eval.m_impl.template packet<Unaligned>(firstIndex + j), &shard->paccum);
|
||||||
|
}
|
||||||
|
shard->saccum = reducer.initialize();
|
||||||
|
for (typename Eval::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
|
||||||
|
reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typename Eval::PacketReturnType paccum;
|
||||||
|
typename Eval::CoeffReturnType saccum;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Self, typename Op>
|
||||||
|
struct FullReducer<Self, Op, ThreadPoolDevice, false> {
|
||||||
|
static const bool HasOptimizedImplementation = !Op::IsStateful;
|
||||||
|
|
||||||
|
// launch one reducer per thread and accumulate the result.
|
||||||
|
static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) {
|
||||||
|
typedef typename Self::Index Index;
|
||||||
|
const Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||||
|
const Index blocksize = std::floor<Index>(static_cast<float>(num_coeffs)/device.numThreads());
|
||||||
|
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
|
||||||
|
eigen_assert(num_coeffs >= numblocks * blocksize);
|
||||||
|
|
||||||
|
std::vector<Notification*> results;
|
||||||
|
results.reserve(numblocks);
|
||||||
|
std::vector<FullReducerShard<Self, Op, false> > shards;
|
||||||
|
shards.resize(numblocks);
|
||||||
|
for (Index i = 0; i < numblocks; ++i) {
|
||||||
|
results.push_back(device.enqueue(&FullReducerShard<Self, Op, false>::run, self, i*blocksize, blocksize, reducer, &shards[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
FullReducerShard<Self, Op, false> finalShard;
|
||||||
|
if (numblocks * blocksize < num_coeffs) {
|
||||||
|
FullReducerShard<Self, Op, false>::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard);
|
||||||
|
} else {
|
||||||
|
finalShard.saccum = reducer.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Index i = 0; i < numblocks; ++i) {
|
||||||
|
wait_until_ready(results[i]);
|
||||||
|
delete results[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Index i = 0; i < numblocks; ++i) {
|
||||||
|
reducer.reduce(shards[i].saccum, &finalShard.saccum);
|
||||||
|
}
|
||||||
|
*output = reducer.finalize(finalShard.saccum);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Self, typename Op>
|
||||||
|
struct FullReducer<Self, Op, ThreadPoolDevice, true> {
|
||||||
|
static const bool HasOptimizedImplementation = !Op::IsStateful;
|
||||||
|
|
||||||
|
// launch one reducer per thread and accumulate the result.
|
||||||
|
static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) {
|
||||||
|
typedef typename Self::Index Index;
|
||||||
|
const Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||||
|
const Index blocksize = std::floor<Index>(static_cast<float>(num_coeffs)/device.numThreads());
|
||||||
|
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
|
||||||
|
eigen_assert(num_coeffs >= numblocks * blocksize);
|
||||||
|
|
||||||
|
std::vector<Notification*> results;
|
||||||
|
results.reserve(numblocks);
|
||||||
|
std::vector<FullReducerShard<Self, Op, true> > shards;
|
||||||
|
shards.resize(numblocks);
|
||||||
|
for (Index i = 0; i < numblocks; ++i) {
|
||||||
|
results.push_back(device.enqueue(&FullReducerShard<Self, Op, true>::run, self, i*blocksize, blocksize, reducer, &shards[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
FullReducerShard<Self, Op, true> finalShard;
|
||||||
|
if (numblocks * blocksize < num_coeffs) {
|
||||||
|
FullReducerShard<Self, Op, true>::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard);
|
||||||
|
} else {
|
||||||
|
finalShard.paccum = reducer.template initializePacket<typename Self::PacketReturnType>();
|
||||||
|
finalShard.saccum = reducer.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Index i = 0; i < numblocks; ++i) {
|
||||||
|
wait_until_ready(results[i]);
|
||||||
|
delete results[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Index i = 0; i < numblocks; ++i) {
|
||||||
|
reducer.reducePacket(shards[i].paccum, &finalShard.paccum);
|
||||||
|
reducer.reduce(shards[i].saccum, &finalShard.saccum);
|
||||||
|
}
|
||||||
|
|
||||||
|
*output = reducer.finalizeBoth(finalShard.saccum, finalShard.paccum);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||||
|
// Full reducers for GPU, don't vectorize for now
|
||||||
|
|
||||||
|
// Reducer function that enables multiple cuda thread to safely accumulate at the same
|
||||||
|
// output address. It basically reads the current value of the output variable, and
|
||||||
|
// attempts to update it with the new value. If in the meantime another cuda thread
|
||||||
|
// updated the content of the output address it will try again.
|
||||||
|
template <typename T, typename R>
|
||||||
|
__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
if (sizeof(T) == 4)
|
||||||
|
{
|
||||||
|
unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
|
||||||
|
unsigned int newval = oldval;
|
||||||
|
reducer.reduce(accum, reinterpret_cast<T*>(&newval));
|
||||||
|
if (newval == oldval) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
unsigned int readback;
|
||||||
|
while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
|
||||||
|
oldval = readback;
|
||||||
|
newval = oldval;
|
||||||
|
reducer.reduce(accum, reinterpret_cast<T*>(&newval));
|
||||||
|
if (newval == oldval) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (sizeof(T) == 8) {
|
||||||
|
unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output);
|
||||||
|
unsigned long long newval = oldval;
|
||||||
|
reducer.reduce(accum, reinterpret_cast<T*>(&newval));
|
||||||
|
if (newval == oldval) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
unsigned long long readback;
|
||||||
|
while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) {
|
||||||
|
oldval = readback;
|
||||||
|
newval = oldval;
|
||||||
|
reducer.reduce(accum, reinterpret_cast<T*>(&newval));
|
||||||
|
if (newval == oldval) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(0 && "Wordsize not supported");
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
assert(0 && "Shouldn't be called on unsupported device");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
__device__ inline void atomicReduce(T* output, T accum, SumReducer<T>&) {
|
||||||
|
#if __CUDA_ARCH__ >= 300
|
||||||
|
atomicAdd(output, accum);
|
||||||
|
#else
|
||||||
|
assert(0 && "Shouldn't be called on unsupported device");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int BlockSize, int NumPerThread, typename Self,
|
||||||
|
typename Reducer, typename Index>
|
||||||
|
__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
|
||||||
|
typename Self::CoeffReturnType* output) {
|
||||||
|
const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
|
||||||
|
|
||||||
|
if (first_index == 0) {
|
||||||
|
*output = reducer.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||||
|
for (Index i = 0; i < NumPerThread; ++i) {
|
||||||
|
const Index index = first_index + i * BlockSize;
|
||||||
|
if (index >= num_coeffs) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
typename Self::CoeffReturnType val = input.m_impl.coeff(index);
|
||||||
|
reducer.reduce(val, &accum);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int offset = warpSize/2; offset > 0; offset /= 2) {
|
||||||
|
reducer.reduce(__shfl_down(accum, offset), &accum);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((threadIdx.x & (warpSize - 1)) == 0) {
|
||||||
|
atomicReduce(output, accum, reducer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Self, typename Op, bool Vectorizable>
|
||||||
|
struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
||||||
|
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
||||||
|
// so reduce the scope of the optimized version of the code to the simple case
|
||||||
|
// of floats.
|
||||||
|
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||||
|
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||||
|
|
||||||
|
template <typename OutputType>
|
||||||
|
static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
|
||||||
|
assert(false && "Should only be called on floats");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) {
|
||||||
|
typedef typename Self::Index Index;
|
||||||
|
|
||||||
|
const Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||||
|
const int block_size = 256;
|
||||||
|
const int num_per_thread = 128;
|
||||||
|
const int num_blocks = std::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread));
|
||||||
|
LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread>),
|
||||||
|
num_blocks, block_size, 0, device, reducer, self, num_coeffs, output);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Self, typename Op,
|
||||||
|
bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
|
||||||
|
class BlockReducer {
|
||||||
|
public:
|
||||||
|
typedef typename Self::Index Index;
|
||||||
|
typedef typename Self::Scalar Scalar;
|
||||||
|
typedef typename Self::CoeffReturnType CoeffReturnType;
|
||||||
|
explicit BlockReducer(const Op& reducer) : op_(reducer) {
|
||||||
|
accum_ = op_.initialize();
|
||||||
|
}
|
||||||
|
void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
|
||||||
|
for (Index i = 0; i < num_values_to_reduce; ++i) {
|
||||||
|
op_.reduce(data[index + i], &accum_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CoeffReturnType Finalize() {
|
||||||
|
return op_.finalize(accum_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
CoeffReturnType accum_;
|
||||||
|
Op op_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename Self, typename Op>
|
||||||
|
class BlockReducer<Self, Op, true> {
|
||||||
|
public:
|
||||||
|
typedef typename Self::Index Index;
|
||||||
|
typedef typename Self::Scalar Scalar;
|
||||||
|
typedef typename Self::CoeffReturnType CoeffReturnType;
|
||||||
|
typedef typename Self::PacketReturnType PacketReturnType;
|
||||||
|
explicit BlockReducer(const Op& reducer) : op_(reducer) {
|
||||||
|
vaccum_ = op_.template initializePacket<PacketReturnType>();
|
||||||
|
accum_ = op_.initialize();
|
||||||
|
}
|
||||||
|
void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
|
||||||
|
const int packet_size = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
|
const typename Self::Index vectorized_size = (num_values_to_reduce /
|
||||||
|
packet_size) * packet_size;
|
||||||
|
for (typename Self::Index i = 0; i < vectorized_size; i += packet_size) {
|
||||||
|
op_.reducePacket(internal::ploadt<PacketReturnType, Unaligned>(
|
||||||
|
&data[index + i]), &vaccum_);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (typename Self::Index i = vectorized_size;
|
||||||
|
i < num_values_to_reduce; ++i) {
|
||||||
|
op_.reduce(data[index + i], &accum_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
typename Self::CoeffReturnType Finalize() {
|
||||||
|
return op_.finalizeBoth(accum_, vaccum_);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
typename Self::PacketReturnType vaccum_;
|
||||||
|
typename Self::CoeffReturnType accum_;
|
||||||
|
Op op_;
|
||||||
|
};
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
|
||||||
@ -179,6 +523,7 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType>
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims)
|
TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims)
|
||||||
{ }
|
{ }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer)
|
TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
@ -186,6 +531,7 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType>
|
|||||||
const XprType& expression() const { return m_expr; }
|
const XprType& expression() const { return m_expr; }
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const Dims& dims() const { return m_dims; }
|
const Dims& dims() const { return m_dims; }
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const Op& reducer() const { return m_reducer; }
|
const Op& reducer() const { return m_reducer; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -201,10 +547,11 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
{
|
{
|
||||||
typedef TensorReductionOp<Op, Dims, ArgType> XprType;
|
typedef TensorReductionOp<Op, Dims, ArgType> XprType;
|
||||||
typedef typename XprType::Index Index;
|
typedef typename XprType::Index Index;
|
||||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
|
||||||
|
static const int NumInputDims = internal::array_size<InputDimensions>::value;
|
||||||
static const int NumReducedDims = internal::array_size<Dims>::value;
|
static const int NumReducedDims = internal::array_size<Dims>::value;
|
||||||
static const int NumOutputDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims;
|
static const int NumOutputDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims;
|
||||||
typedef DSizes<Index, NumOutputDims> Dimensions;
|
typedef typename internal::conditional<NumInputDims==NumReducedDims, Sizes<1>, DSizes<Index, NumOutputDims> >::type Dimensions;
|
||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> Self;
|
typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> Self;
|
||||||
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
|
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
|
||||||
@ -218,9 +565,10 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
|
|
||||||
static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||||
static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||||
|
static const bool RunningFullReduction = (NumInputDims==NumReducedDims);
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_impl(op.expression(), device), m_reducer(op.reducer())
|
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
|
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
|
||||||
@ -238,17 +586,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
|
|
||||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||||
int outputIndex = 0;
|
internal::DimInitializer<Dimensions>::run(input_dims, reduced, &m_dimensions, &m_reducedDims);
|
||||||
int reduceIndex = 0;
|
|
||||||
for (int i = 0; i < NumInputDims; ++i) {
|
|
||||||
if (reduced[i]) {
|
|
||||||
m_reducedDims[reduceIndex] = input_dims[i];
|
|
||||||
++reduceIndex;
|
|
||||||
} else {
|
|
||||||
m_dimensions[outputIndex] = input_dims[i];
|
|
||||||
++outputIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Precompute output strides.
|
// Precompute output strides.
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
@ -277,8 +615,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
outputIndex = 0;
|
int outputIndex = 0;
|
||||||
reduceIndex = 0;
|
int reduceIndex = 0;
|
||||||
for (int i = 0; i < NumInputDims; ++i) {
|
for (int i = 0; i < NumInputDims; ++i) {
|
||||||
if (reduced[i]) {
|
if (reduced[i]) {
|
||||||
m_reducedStrides[reduceIndex] = input_strides[i];
|
m_reducedStrides[reduceIndex] = input_strides[i];
|
||||||
@ -291,27 +629,50 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
|
|
||||||
// Special case for full reductions
|
// Special case for full reductions
|
||||||
if (NumInputDims == NumReducedDims) {
|
if (NumInputDims == NumReducedDims) {
|
||||||
m_dimensions[0] = 1;
|
eigen_assert(m_dimensions[0] == 1);
|
||||||
m_preservedStrides[0] = internal::array_prod(input_dims);
|
m_preservedStrides[0] = internal::array_prod(input_dims);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
|
typedef typename internal::remove_const<typename XprType::PacketReturnType>::type PacketReturnType;
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
|
|
||||||
|
// Use the FullReducer if possible.
|
||||||
|
if (RunningFullReduction && internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation &&
|
||||||
|
((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) ||
|
||||||
|
(internal::array_prod(m_impl.dimensions()) > 1024 * 1024))) {
|
||||||
|
|
||||||
|
bool need_assign = false;
|
||||||
|
if (!data) {
|
||||||
|
m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType)));
|
||||||
|
data = m_result;
|
||||||
|
need_assign = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Op reducer(m_reducer);
|
||||||
|
internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data);
|
||||||
|
return need_assign;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
m_impl.cleanup();
|
m_impl.cleanup();
|
||||||
|
if (m_result) {
|
||||||
|
m_device.deallocate(m_result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
|
||||||
typedef typename internal::remove_const<typename XprType::PacketReturnType>::type PacketReturnType;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
|
if (RunningFullReduction && m_result) {
|
||||||
|
return *m_result;
|
||||||
|
}
|
||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
if (ReducingInnerMostDims) {
|
if (ReducingInnerMostDims) {
|
||||||
const Index num_values_to_reduce =
|
const Index num_values_to_reduce =
|
||||||
@ -372,6 +733,13 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
template <int, typename, typename> friend struct internal::GenericDimReducer;
|
template <int, typename, typename> friend struct internal::GenericDimReducer;
|
||||||
template <typename, typename, bool> friend struct internal::InnerMostDimReducer;
|
template <typename, typename, bool> friend struct internal::InnerMostDimReducer;
|
||||||
template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver;
|
template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver;
|
||||||
|
template <typename S, typename O, typename D, bool V> friend struct internal::FullReducer;
|
||||||
|
#ifdef EIGEN_USE_THREADS
|
||||||
|
template <typename S, typename O, bool V> friend struct internal::FullReducerShard;
|
||||||
|
#endif
|
||||||
|
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||||
|
template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*);
|
||||||
|
#endif
|
||||||
|
|
||||||
// Returns the Index in the input tensor of the first value that needs to be
|
// Returns the Index in the input tensor of the first value that needs to be
|
||||||
// used to compute the reduction at output index "index".
|
// used to compute the reduction at output index "index".
|
||||||
@ -392,7 +760,12 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
startInput += idx * m_preservedStrides[i];
|
startInput += idx * m_preservedStrides[i];
|
||||||
index -= idx * m_outputStrides[i];
|
index -= idx * m_outputStrides[i];
|
||||||
}
|
}
|
||||||
|
if (PreservingInnerMostDims) {
|
||||||
|
eigen_assert(m_preservedStrides[0] == 1);
|
||||||
|
startInput += index;
|
||||||
|
} else {
|
||||||
startInput += index * m_preservedStrides[0];
|
startInput += index * m_preservedStrides[0];
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < NumOutputDims - 1; ++i) {
|
for (int i = 0; i < NumOutputDims - 1; ++i) {
|
||||||
// This is index_i in the output tensor.
|
// This is index_i in the output tensor.
|
||||||
@ -400,8 +773,13 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
startInput += idx * m_preservedStrides[i];
|
startInput += idx * m_preservedStrides[i];
|
||||||
index -= idx * m_outputStrides[i];
|
index -= idx * m_outputStrides[i];
|
||||||
}
|
}
|
||||||
|
if (PreservingInnerMostDims) {
|
||||||
|
eigen_assert(m_preservedStrides[NumOutputDims - 1] == 1);
|
||||||
|
startInput += index;
|
||||||
|
} else {
|
||||||
startInput += index * m_preservedStrides[NumOutputDims - 1];
|
startInput += index * m_preservedStrides[NumOutputDims - 1];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return startInput;
|
return startInput;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -425,6 +803,16 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
|||||||
|
|
||||||
// Operation to apply for computing the reduction.
|
// Operation to apply for computing the reduction.
|
||||||
Op m_reducer;
|
Op m_reducer;
|
||||||
|
|
||||||
|
// For full reductions
|
||||||
|
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||||
|
static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
|
||||||
|
#else
|
||||||
|
static const bool RunningOnGPU = false;
|
||||||
|
#endif
|
||||||
|
CoeffReturnType* m_result;
|
||||||
|
|
||||||
|
const Device& m_device;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -80,7 +80,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign;
|
typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,7 +90,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign;
|
typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,7 +88,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
|
typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,7 +88,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
|
|||||||
{
|
{
|
||||||
typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
|
typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
|
||||||
Assign assign(*this, other);
|
Assign assign(*this, other);
|
||||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
677
unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
Normal file
677
unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
Normal file
@ -0,0 +1,677 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
|
||||||
|
#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
|
||||||
|
#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
/** \class TensorVolumePatch
|
||||||
|
* \ingroup CXX11_Tensor_Module
|
||||||
|
*
|
||||||
|
* \brief Patch extraction specialized for processing of volumetric data.
|
||||||
|
* This assumes that the input has a least 4 dimensions ordered as follows:
|
||||||
|
* - channels
|
||||||
|
* - planes
|
||||||
|
* - rows
|
||||||
|
* - columns
|
||||||
|
* - (optional) additional dimensions such as time or batch size.
|
||||||
|
* Calling the volume patch code with patch_planes, patch_rows, and patch_cols
|
||||||
|
* is equivalent to calling the regular patch extraction code with parameters
|
||||||
|
* d, patch_planes, patch_rows, patch_cols, and 1 for all the additional
|
||||||
|
* dimensions.
|
||||||
|
*/
|
||||||
|
namespace internal {
|
||||||
|
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||||
|
struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType>
|
||||||
|
{
|
||||||
|
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||||
|
typedef traits<XprType> XprTraits;
|
||||||
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
typedef typename XprTraits::StorageKind StorageKind;
|
||||||
|
typedef typename XprTraits::Index Index;
|
||||||
|
typedef typename XprType::Nested Nested;
|
||||||
|
typedef typename remove_reference<Nested>::type _Nested;
|
||||||
|
static const int NumDimensions = XprTraits::NumDimensions + 1;
|
||||||
|
static const int Layout = XprTraits::Layout;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||||
|
struct eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, Eigen::Dense>
|
||||||
|
{
|
||||||
|
typedef const TensorVolumePatchOp<Planes, Rows, Cols, XprType>& type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||||
|
struct nested<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, 1, typename eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType> >::type>
|
||||||
|
{
|
||||||
|
typedef TensorVolumePatchOp<Planes, Rows, Cols, XprType> type;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||||
|
class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, ReadOnlyAccessors>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Scalar Scalar;
|
||||||
|
typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Packet Packet;
|
||||||
|
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||||
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||||
|
typedef typename Eigen::internal::nested<TensorVolumePatchOp>::type Nested;
|
||||||
|
typedef typename Eigen::internal::traits<TensorVolumePatchOp>::StorageKind StorageKind;
|
||||||
|
typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Index Index;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
|
||||||
|
DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
|
||||||
|
DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
|
||||||
|
DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
|
||||||
|
PaddingType padding_type, Scalar padding_value)
|
||||||
|
: m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
|
||||||
|
m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
|
||||||
|
m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
|
||||||
|
m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
|
||||||
|
m_padding_explicit(false), m_padding_top_z(0), m_padding_bottom_z(0), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0),
|
||||||
|
m_padding_type(padding_type), m_padding_value(padding_value) {}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
|
||||||
|
DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
|
||||||
|
DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
|
||||||
|
DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
|
||||||
|
DenseIndex padding_top_z, DenseIndex padding_bottom_z,
|
||||||
|
DenseIndex padding_top, DenseIndex padding_bottom,
|
||||||
|
DenseIndex padding_left, DenseIndex padding_right,
|
||||||
|
Scalar padding_value)
|
||||||
|
: m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
|
||||||
|
m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
|
||||||
|
m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
|
||||||
|
m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
|
||||||
|
m_padding_explicit(true), m_padding_top_z(padding_top_z), m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
|
||||||
|
m_padding_left(padding_left), m_padding_right(padding_right),
|
||||||
|
m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex patch_planes() const { return m_patch_planes; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex patch_rows() const { return m_patch_rows; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex patch_cols() const { return m_patch_cols; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex plane_strides() const { return m_plane_strides; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex row_strides() const { return m_row_strides; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex col_strides() const { return m_col_strides; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex in_plane_strides() const { return m_in_plane_strides; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex in_row_strides() const { return m_in_row_strides; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex in_col_strides() const { return m_in_col_strides; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex plane_inflate_strides() const { return m_plane_inflate_strides; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex row_inflate_strides() const { return m_row_inflate_strides; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex col_inflate_strides() const { return m_col_inflate_strides; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
bool padding_explicit() const { return m_padding_explicit; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex padding_top_z() const { return m_padding_top_z; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex padding_bottom_z() const { return m_padding_bottom_z; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex padding_top() const { return m_padding_top; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex padding_bottom() const { return m_padding_bottom; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex padding_left() const { return m_padding_left; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
DenseIndex padding_right() const { return m_padding_right; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
PaddingType padding_type() const { return m_padding_type; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
Scalar padding_value() const { return m_padding_value; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||||
|
expression() const { return m_xpr; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
typename XprType::Nested m_xpr;
|
||||||
|
const DenseIndex m_patch_planes;
|
||||||
|
const DenseIndex m_patch_rows;
|
||||||
|
const DenseIndex m_patch_cols;
|
||||||
|
const DenseIndex m_plane_strides;
|
||||||
|
const DenseIndex m_row_strides;
|
||||||
|
const DenseIndex m_col_strides;
|
||||||
|
const DenseIndex m_in_plane_strides;
|
||||||
|
const DenseIndex m_in_row_strides;
|
||||||
|
const DenseIndex m_in_col_strides;
|
||||||
|
const DenseIndex m_plane_inflate_strides;
|
||||||
|
const DenseIndex m_row_inflate_strides;
|
||||||
|
const DenseIndex m_col_inflate_strides;
|
||||||
|
const bool m_padding_explicit;
|
||||||
|
const DenseIndex m_padding_top_z;
|
||||||
|
const DenseIndex m_padding_bottom_z;
|
||||||
|
const DenseIndex m_padding_top;
|
||||||
|
const DenseIndex m_padding_bottom;
|
||||||
|
const DenseIndex m_padding_left;
|
||||||
|
const DenseIndex m_padding_right;
|
||||||
|
const PaddingType m_padding_type;
|
||||||
|
const Scalar m_padding_value;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Eval as rvalue
|
||||||
|
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device>
|
||||||
|
struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, Device>
|
||||||
|
{
|
||||||
|
typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType;
|
||||||
|
typedef typename XprType::Index Index;
|
||||||
|
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||||
|
static const int NumDims = NumInputDims + 1;
|
||||||
|
typedef DSizes<Index, NumDims> Dimensions;
|
||||||
|
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
IsAligned = false,
|
||||||
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
|
BlockAccess = false,
|
||||||
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
|
CoordAccess = NumDims == 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
|
: m_impl(op.expression(), device)
|
||||||
|
{
|
||||||
|
EIGEN_STATIC_ASSERT(NumDims >= 5, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
|
||||||
|
m_paddingValue = op.padding_value();
|
||||||
|
|
||||||
|
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||||
|
|
||||||
|
// Cache a few variables.
|
||||||
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
m_inputDepth = input_dims[0];
|
||||||
|
m_inputPlanes = input_dims[1];
|
||||||
|
m_inputRows = input_dims[2];
|
||||||
|
m_inputCols = input_dims[3];
|
||||||
|
} else {
|
||||||
|
m_inputDepth = input_dims[NumInputDims-1];
|
||||||
|
m_inputPlanes = input_dims[NumInputDims-2];
|
||||||
|
m_inputRows = input_dims[NumInputDims-3];
|
||||||
|
m_inputCols = input_dims[NumInputDims-4];
|
||||||
|
}
|
||||||
|
|
||||||
|
m_plane_strides = op.plane_strides();
|
||||||
|
m_row_strides = op.row_strides();
|
||||||
|
m_col_strides = op.col_strides();
|
||||||
|
|
||||||
|
// Input strides and effective input/patch size
|
||||||
|
m_in_plane_strides = op.in_plane_strides();
|
||||||
|
m_in_row_strides = op.in_row_strides();
|
||||||
|
m_in_col_strides = op.in_col_strides();
|
||||||
|
m_plane_inflate_strides = op.plane_inflate_strides();
|
||||||
|
m_row_inflate_strides = op.row_inflate_strides();
|
||||||
|
m_col_inflate_strides = op.col_inflate_strides();
|
||||||
|
|
||||||
|
// The "effective" spatial size after inflating data with zeros.
|
||||||
|
m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1;
|
||||||
|
m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1;
|
||||||
|
m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1;
|
||||||
|
m_patch_planes_eff = op.patch_planes() + (op.patch_planes() - 1) * (m_in_plane_strides - 1);
|
||||||
|
m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1);
|
||||||
|
m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1);
|
||||||
|
|
||||||
|
if (op.padding_explicit()) {
|
||||||
|
m_outputPlanes = numext::ceil((m_input_planes_eff + op.padding_top_z() + op.padding_bottom_z() - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides));
|
||||||
|
m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
|
||||||
|
m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
|
||||||
|
m_planePaddingTop = op.padding_top_z();
|
||||||
|
m_rowPaddingTop = op.padding_top();
|
||||||
|
m_colPaddingLeft = op.padding_left();
|
||||||
|
} else {
|
||||||
|
// Computing padding from the type
|
||||||
|
switch (op.padding_type()) {
|
||||||
|
case PADDING_VALID:
|
||||||
|
m_outputPlanes = numext::ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides));
|
||||||
|
m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
|
||||||
|
m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
|
||||||
|
m_planePaddingTop = 0;
|
||||||
|
m_rowPaddingTop = 0;
|
||||||
|
m_colPaddingLeft = 0;
|
||||||
|
break;
|
||||||
|
case PADDING_SAME: {
|
||||||
|
m_outputPlanes = numext::ceil(m_input_planes_eff / static_cast<float>(m_plane_strides));
|
||||||
|
m_outputRows = numext::ceil(m_input_rows_eff / static_cast<float>(m_row_strides));
|
||||||
|
m_outputCols = numext::ceil(m_input_cols_eff / static_cast<float>(m_col_strides));
|
||||||
|
const Index dz = m_outputPlanes * m_plane_strides + m_patch_planes_eff - 1 - m_input_planes_eff;
|
||||||
|
const Index dy = m_outputRows * m_row_strides + m_patch_rows_eff - 1 - m_input_rows_eff;
|
||||||
|
const Index dx = m_outputCols * m_col_strides + m_patch_cols_eff - 1 - m_input_cols_eff;
|
||||||
|
m_planePaddingTop = dz - dz / 2;
|
||||||
|
m_rowPaddingTop = dy - dy / 2;
|
||||||
|
m_colPaddingLeft = dx - dx / 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
eigen_assert(false && "unexpected padding");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eigen_assert(m_outputRows > 0);
|
||||||
|
eigen_assert(m_outputCols > 0);
|
||||||
|
eigen_assert(m_outputPlanes > 0);
|
||||||
|
|
||||||
|
// Dimensions for result of extraction.
|
||||||
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
// ColMajor
|
||||||
|
// 0: depth
|
||||||
|
// 1: patch_planes
|
||||||
|
// 2: patch_rows
|
||||||
|
// 3: patch_cols
|
||||||
|
// 4: number of patches
|
||||||
|
// 5 and beyond: anything else (such as batch).
|
||||||
|
m_dimensions[0] = input_dims[0];
|
||||||
|
m_dimensions[1] = op.patch_planes();
|
||||||
|
m_dimensions[2] = op.patch_rows();
|
||||||
|
m_dimensions[3] = op.patch_cols();
|
||||||
|
m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols;
|
||||||
|
for (int i = 5; i < NumDims; ++i) {
|
||||||
|
m_dimensions[i] = input_dims[i-1];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// RowMajor
|
||||||
|
// NumDims-1: depth
|
||||||
|
// NumDims-2: patch_planes
|
||||||
|
// NumDims-3: patch_rows
|
||||||
|
// NumDims-4: patch_cols
|
||||||
|
// NumDims-5: number of patches
|
||||||
|
// NumDims-6 and beyond: anything else (such as batch).
|
||||||
|
m_dimensions[NumDims-1] = input_dims[NumInputDims-1];
|
||||||
|
m_dimensions[NumDims-2] = op.patch_planes();
|
||||||
|
m_dimensions[NumDims-3] = op.patch_rows();
|
||||||
|
m_dimensions[NumDims-4] = op.patch_cols();
|
||||||
|
m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols;
|
||||||
|
for (int i = NumDims-6; i >= 0; --i) {
|
||||||
|
m_dimensions[i] = input_dims[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strides for the output tensor.
|
||||||
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
m_rowStride = m_dimensions[1];
|
||||||
|
m_colStride = m_dimensions[2] * m_rowStride;
|
||||||
|
m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0];
|
||||||
|
m_otherStride = m_patchStride * m_dimensions[4];
|
||||||
|
} else {
|
||||||
|
m_rowStride = m_dimensions[NumDims-2];
|
||||||
|
m_colStride = m_dimensions[NumDims-3] * m_rowStride;
|
||||||
|
m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1];
|
||||||
|
m_otherStride = m_patchStride * m_dimensions[NumDims-5];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strides for navigating through the input tensor.
|
||||||
|
m_planeInputStride = m_inputDepth;
|
||||||
|
m_rowInputStride = m_inputDepth * m_inputPlanes;
|
||||||
|
m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes;
|
||||||
|
m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes;
|
||||||
|
|
||||||
|
m_outputPlanesRows = m_outputPlanes * m_outputRows;
|
||||||
|
|
||||||
|
// Fast representations of different variables.
|
||||||
|
m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride);
|
||||||
|
m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride);
|
||||||
|
m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride);
|
||||||
|
m_fastRowStride = internal::TensorIntDivisor<Index>(m_rowStride);
|
||||||
|
m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides);
|
||||||
|
m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides);
|
||||||
|
m_fastInputPlaneStride = internal::TensorIntDivisor<Index>(m_plane_inflate_strides);
|
||||||
|
m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff);
|
||||||
|
m_fastOutputPlanes = internal::TensorIntDivisor<Index>(m_outputPlanes);
|
||||||
|
m_fastOutputPlanesRows = internal::TensorIntDivisor<Index>(m_outputPlanesRows);
|
||||||
|
|
||||||
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]);
|
||||||
|
} else {
|
||||||
|
m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
||||||
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
|
m_impl.cleanup();
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
||||||
|
{
|
||||||
|
// Patch index corresponding to the passed in index.
|
||||||
|
const Index patchIndex = index / m_fastPatchStride;
|
||||||
|
|
||||||
|
// Spatial offset within the patch. This has to be translated into 3D
|
||||||
|
// coordinates within the patch.
|
||||||
|
const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth;
|
||||||
|
|
||||||
|
// Batch, etc.
|
||||||
|
const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride;
|
||||||
|
const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride;
|
||||||
|
|
||||||
|
// Calculate column index in the input original tensor.
|
||||||
|
const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
|
||||||
|
const Index colOffset = patchOffset / m_fastColStride;
|
||||||
|
const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
|
||||||
|
const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
|
||||||
|
if (inputCol < 0 || inputCol >= m_input_cols_eff ||
|
||||||
|
((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
|
||||||
|
return Scalar(m_paddingValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate row index in the original input tensor.
|
||||||
|
const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
|
||||||
|
const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride;
|
||||||
|
const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
|
||||||
|
const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
|
||||||
|
if (inputRow < 0 || inputRow >= m_input_rows_eff ||
|
||||||
|
((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
|
||||||
|
return Scalar(m_paddingValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate plane index in the original input tensor.
|
||||||
|
const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
|
||||||
|
const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride;
|
||||||
|
const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop;
|
||||||
|
const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0);
|
||||||
|
if (inputPlane < 0 || inputPlane >= m_input_planes_eff ||
|
||||||
|
((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) {
|
||||||
|
return Scalar(m_paddingValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
|
||||||
|
const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
|
||||||
|
|
||||||
|
const Index inputIndex = depth +
|
||||||
|
origInputRow * m_rowInputStride +
|
||||||
|
origInputCol * m_colInputStride +
|
||||||
|
origInputPlane * m_planeInputStride +
|
||||||
|
otherIndex * m_otherInputStride;
|
||||||
|
|
||||||
|
return m_impl.coeff(inputIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LoadMode>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||||
|
{
|
||||||
|
const Index packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
|
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||||
|
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
||||||
|
|
||||||
|
if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 ||
|
||||||
|
m_in_plane_strides != 1 || m_plane_inflate_strides != 1) {
|
||||||
|
return packetWithPossibleZero(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Index indices[2] = {index, index + packetSize - 1};
|
||||||
|
const Index patchIndex = indices[0] / m_fastPatchStride;
|
||||||
|
if (patchIndex != indices[1] / m_fastPatchStride) {
|
||||||
|
return packetWithPossibleZero(index);
|
||||||
|
}
|
||||||
|
const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride;
|
||||||
|
eigen_assert(otherIndex == indices[1] / m_fastOtherStride);
|
||||||
|
|
||||||
|
// Find the offset of the element wrt the location of the first element.
|
||||||
|
const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth,
|
||||||
|
(indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth};
|
||||||
|
|
||||||
|
const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride;
|
||||||
|
eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride);
|
||||||
|
|
||||||
|
const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
|
||||||
|
const Index colOffsets[2] = {
|
||||||
|
patchOffsets[0] / m_fastColStride,
|
||||||
|
patchOffsets[1] / m_fastColStride};
|
||||||
|
|
||||||
|
// Calculate col indices in the original input tensor.
|
||||||
|
const Index inputCols[2] = {
|
||||||
|
colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft,
|
||||||
|
colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft};
|
||||||
|
if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) {
|
||||||
|
return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inputCols[0] != inputCols[1]) {
|
||||||
|
return packetWithPossibleZero(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
|
||||||
|
const Index rowOffsets[2] = {
|
||||||
|
(patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride,
|
||||||
|
(patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride};
|
||||||
|
eigen_assert(rowOffsets[0] <= rowOffsets[1]);
|
||||||
|
// Calculate col indices in the original input tensor.
|
||||||
|
const Index inputRows[2] = {
|
||||||
|
rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop,
|
||||||
|
rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop};
|
||||||
|
|
||||||
|
if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) {
|
||||||
|
return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inputRows[0] != inputRows[1]) {
|
||||||
|
return packetWithPossibleZero(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
|
||||||
|
const Index planeOffsets[2] = {
|
||||||
|
patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride,
|
||||||
|
patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride};
|
||||||
|
eigen_assert(planeOffsets[0] <= planeOffsets[1]);
|
||||||
|
const Index inputPlanes[2] = {
|
||||||
|
planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop,
|
||||||
|
planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop};
|
||||||
|
|
||||||
|
if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) {
|
||||||
|
return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) {
|
||||||
|
// no padding
|
||||||
|
const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
|
||||||
|
const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
|
||||||
|
const Index inputIndex = depth +
|
||||||
|
inputRows[0] * m_rowInputStride +
|
||||||
|
inputCols[0] * m_colInputStride +
|
||||||
|
m_planeInputStride * inputPlanes[0] +
|
||||||
|
otherIndex * m_otherInputStride;
|
||||||
|
return m_impl.template packet<Unaligned>(inputIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
return packetWithPossibleZero(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
||||||
|
|
||||||
|
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||||
|
|
||||||
|
Index planePaddingTop() const { return m_planePaddingTop; }
|
||||||
|
Index rowPaddingTop() const { return m_rowPaddingTop; }
|
||||||
|
Index colPaddingLeft() const { return m_colPaddingLeft; }
|
||||||
|
Index outputPlanes() const { return m_outputPlanes; }
|
||||||
|
Index outputRows() const { return m_outputRows; }
|
||||||
|
Index outputCols() const { return m_outputCols; }
|
||||||
|
Index userPlaneStride() const { return m_plane_strides; }
|
||||||
|
Index userRowStride() const { return m_row_strides; }
|
||||||
|
Index userColStride() const { return m_col_strides; }
|
||||||
|
Index userInPlaneStride() const { return m_in_plane_strides; }
|
||||||
|
Index userInRowStride() const { return m_in_row_strides; }
|
||||||
|
Index userInColStride() const { return m_in_col_strides; }
|
||||||
|
Index planeInflateStride() const { return m_plane_inflate_strides; }
|
||||||
|
Index rowInflateStride() const { return m_row_inflate_strides; }
|
||||||
|
Index colInflateStride() const { return m_col_inflate_strides; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const
|
||||||
|
{
|
||||||
|
// ColMajor
|
||||||
|
// 0: depth, 1: patch_planes, 2: patch_rows, 3: patch_cols, 4: number of patches, 5: batches
|
||||||
|
// RowMajor
|
||||||
|
// 0: batches, 1: number of patches, 2: patch_cols , 3: patch_rows, 4: patch_planes, 5: depth
|
||||||
|
const Index patch3DIndex = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 4 : 1];
|
||||||
|
const Index colOffset = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 3 : 2];
|
||||||
|
const Index rowOffset= coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 3];
|
||||||
|
const Index planeOffset = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 4];
|
||||||
|
|
||||||
|
array<Index, NumDims-1> inputCoords;
|
||||||
|
|
||||||
|
const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
|
||||||
|
const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
|
||||||
|
const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
|
||||||
|
if (inputCol < 0 || inputCol >= m_input_cols_eff ||
|
||||||
|
((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
|
||||||
|
return Scalar(m_paddingValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
|
||||||
|
const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
|
||||||
|
const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
|
||||||
|
if (inputRow < 0 || inputRow >= m_input_rows_eff ||
|
||||||
|
((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
|
||||||
|
return Scalar(m_paddingValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
const Index planeIndex = patch3DIndex - colIndex * m_outputPlanesRows - rowIndex * m_outputRows;
|
||||||
|
const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop;
|
||||||
|
const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0);
|
||||||
|
if (inputPlane < 0 || inputPlane >= m_input_planes_eff ||
|
||||||
|
((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) {
|
||||||
|
return Scalar(m_paddingValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
inputCoords[0] = coords[0]; // depth
|
||||||
|
inputCoords[1] = origInputPlane;
|
||||||
|
inputCoords[2] = origInputRow;
|
||||||
|
inputCoords[3] = origInputCol;
|
||||||
|
inputCoords[4] = coords[5]; // batch
|
||||||
|
} else {
|
||||||
|
inputCoords[4] = coords[5]; // depth
|
||||||
|
inputCoords[3] = origInputPlane;
|
||||||
|
inputCoords[2] = origInputRow;
|
||||||
|
inputCoords[1] = origInputCol;
|
||||||
|
inputCoords[0] = coords[0]; // batch
|
||||||
|
}
|
||||||
|
if (TensorEvaluator<ArgType, Device>::CoordAccess) {
|
||||||
|
return m_impl.coeff(inputCoords);
|
||||||
|
} else {
|
||||||
|
Index inputIndex;
|
||||||
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
inputIndex =
|
||||||
|
inputCoords[4] * m_otherInputStride +
|
||||||
|
inputCoords[3] * m_colInputStride +
|
||||||
|
inputCoords[2] * m_rowInputStride +
|
||||||
|
inputCoords[1] * m_planeInputStride +
|
||||||
|
inputCoords[0];
|
||||||
|
} else {
|
||||||
|
inputIndex =
|
||||||
|
inputCoords[0] * m_otherInputStride +
|
||||||
|
inputCoords[1] * m_colInputStride +
|
||||||
|
inputCoords[2] * m_rowInputStride +
|
||||||
|
inputCoords[3] * m_planeInputStride +
|
||||||
|
inputCoords[4];
|
||||||
|
}
|
||||||
|
return m_impl.coeff(inputIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||||
|
{
|
||||||
|
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
|
EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||||
|
for (int i = 0; i < packetSize; ++i) {
|
||||||
|
values[i] = coeff(index+i);
|
||||||
|
}
|
||||||
|
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
||||||
|
return rslt;
|
||||||
|
}
|
||||||
|
|
||||||
|
Dimensions m_dimensions;
|
||||||
|
|
||||||
|
// Parameters passed to the costructor.
|
||||||
|
Index m_plane_strides;
|
||||||
|
Index m_row_strides;
|
||||||
|
Index m_col_strides;
|
||||||
|
|
||||||
|
Index m_outputPlanes;
|
||||||
|
Index m_outputRows;
|
||||||
|
Index m_outputCols;
|
||||||
|
|
||||||
|
Index m_planePaddingTop;
|
||||||
|
Index m_rowPaddingTop;
|
||||||
|
Index m_colPaddingLeft;
|
||||||
|
|
||||||
|
Index m_in_plane_strides;
|
||||||
|
Index m_in_row_strides;
|
||||||
|
Index m_in_col_strides;
|
||||||
|
|
||||||
|
Index m_plane_inflate_strides;
|
||||||
|
Index m_row_inflate_strides;
|
||||||
|
Index m_col_inflate_strides;
|
||||||
|
|
||||||
|
// Cached input size.
|
||||||
|
Index m_inputDepth;
|
||||||
|
Index m_inputPlanes;
|
||||||
|
Index m_inputRows;
|
||||||
|
Index m_inputCols;
|
||||||
|
|
||||||
|
// Other cached variables.
|
||||||
|
Index m_outputPlanesRows;
|
||||||
|
|
||||||
|
// Effective input/patch post-inflation size.
|
||||||
|
Index m_input_planes_eff;
|
||||||
|
Index m_input_rows_eff;
|
||||||
|
Index m_input_cols_eff;
|
||||||
|
Index m_patch_planes_eff;
|
||||||
|
Index m_patch_rows_eff;
|
||||||
|
Index m_patch_cols_eff;
|
||||||
|
|
||||||
|
// Strides for the output tensor.
|
||||||
|
Index m_otherStride;
|
||||||
|
Index m_patchStride;
|
||||||
|
Index m_rowStride;
|
||||||
|
Index m_colStride;
|
||||||
|
|
||||||
|
// Strides for the input tensor.
|
||||||
|
Index m_planeInputStride;
|
||||||
|
Index m_rowInputStride;
|
||||||
|
Index m_colInputStride;
|
||||||
|
Index m_otherInputStride;
|
||||||
|
|
||||||
|
internal::TensorIntDivisor<Index> m_fastOtherStride;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastPatchStride;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastColStride;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastRowStride;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastInputPlaneStride;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastInputRowStride;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastInputColStride;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastInputColsEff;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastOutputPlanesRows;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastOutputPlanes;
|
||||||
|
internal::TensorIntDivisor<Index> m_fastOutputDepth;
|
||||||
|
|
||||||
|
Scalar m_paddingValue;
|
||||||
|
|
||||||
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
|
@ -125,6 +125,7 @@ if(EIGEN_TEST_CXX11)
|
|||||||
ei_add_test(cxx11_tensor_padding "-std=c++0x")
|
ei_add_test(cxx11_tensor_padding "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_patch "-std=c++0x")
|
ei_add_test(cxx11_tensor_patch "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_image_patch "-std=c++0x")
|
ei_add_test(cxx11_tensor_image_patch "-std=c++0x")
|
||||||
|
ei_add_test(cxx11_tensor_volume_patch "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_reduction "-std=c++0x")
|
ei_add_test(cxx11_tensor_reduction "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_shuffling "-std=c++0x")
|
ei_add_test(cxx11_tensor_shuffling "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_striding "-std=c++0x")
|
ei_add_test(cxx11_tensor_striding "-std=c++0x")
|
||||||
@ -136,10 +137,12 @@ if(EIGEN_TEST_CXX11)
|
|||||||
ei_add_test(cxx11_tensor_layout_swap "-std=c++0x")
|
ei_add_test(cxx11_tensor_layout_swap "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_io "-std=c++0x")
|
ei_add_test(cxx11_tensor_io "-std=c++0x")
|
||||||
ei_add_test(cxx11_tensor_generator "-std=c++0x")
|
ei_add_test(cxx11_tensor_generator "-std=c++0x")
|
||||||
|
ei_add_test(cxx11_tensor_custom_op "-std=c++0x")
|
||||||
|
|
||||||
# These tests needs nvcc
|
# These tests needs nvcc
|
||||||
# ei_add_test(cxx11_tensor_device "-std=c++0x")
|
# ei_add_test(cxx11_tensor_device "-std=c++0x")
|
||||||
# ei_add_test(cxx11_tensor_cuda "-std=c++0x")
|
# ei_add_test(cxx11_tensor_cuda "-std=c++0x")
|
||||||
# ei_add_test(cxx11_tensor_contract_cuda "-std=c++0x")
|
# ei_add_test(cxx11_tensor_contract_cuda "-std=c++0x")
|
||||||
|
# ei_add_test(cxx11_tensor_reduction_cuda "-std=c++0x")
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
@ -354,7 +354,3 @@ void test_cxx11_meta()
|
|||||||
CALL_SUBTEST(test_array_zip_and_apply());
|
CALL_SUBTEST(test_array_zip_and_apply());
|
||||||
CALL_SUBTEST(test_array_misc());
|
CALL_SUBTEST(test_array_misc());
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
|
|
||||||
*/
|
|
||||||
|
107
unsupported/test/cxx11_tensor_custom_op.cpp
Normal file
107
unsupported/test/cxx11_tensor_custom_op.cpp
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#include "main.h"
|
||||||
|
|
||||||
|
#include <Eigen/CXX11/Tensor>
|
||||||
|
|
||||||
|
using Eigen::Tensor;
|
||||||
|
|
||||||
|
|
||||||
|
struct InsertZeros {
|
||||||
|
DSizes<DenseIndex, 2> dimensions(const Tensor<float, 2>& input) const {
|
||||||
|
DSizes<DenseIndex, 2> result;
|
||||||
|
result[0] = input.dimension(0) * 2;
|
||||||
|
result[1] = input.dimension(1) * 2;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Output, typename Device>
|
||||||
|
void eval(const Tensor<float, 2>& input, Output& output, const Device& device) const
|
||||||
|
{
|
||||||
|
array<DenseIndex, 2> strides{{2, 2}};
|
||||||
|
output.stride(strides).device(device) = input;
|
||||||
|
|
||||||
|
Eigen::DSizes<DenseIndex, 2> offsets(1,1);
|
||||||
|
Eigen::DSizes<DenseIndex, 2> extents(output.dimension(0)-1, output.dimension(1)-1);
|
||||||
|
output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void test_custom_unary_op()
|
||||||
|
{
|
||||||
|
Tensor<float, 2> tensor(3,5);
|
||||||
|
tensor.setRandom();
|
||||||
|
|
||||||
|
Tensor<float, 2> result = tensor.customOp(InsertZeros());
|
||||||
|
VERIFY_IS_EQUAL(result.dimension(0), 6);
|
||||||
|
VERIFY_IS_EQUAL(result.dimension(1), 10);
|
||||||
|
|
||||||
|
for (int i = 0; i < 6; i+=2) {
|
||||||
|
for (int j = 0; j < 10; j+=2) {
|
||||||
|
VERIFY_IS_EQUAL(result(i, j), tensor(i/2, j/2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 1; i < 6; i+=2) {
|
||||||
|
for (int j = 1; j < 10; j+=2) {
|
||||||
|
VERIFY_IS_EQUAL(result(i, j), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct BatchMatMul {
|
||||||
|
DSizes<DenseIndex, 3> dimensions(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2) const {
|
||||||
|
DSizes<DenseIndex, 3> result;
|
||||||
|
result[0] = input1.dimension(0);
|
||||||
|
result[1] = input2.dimension(1);
|
||||||
|
result[2] = input2.dimension(2);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Output, typename Device>
|
||||||
|
void eval(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2,
|
||||||
|
Output& output, const Device& device) const
|
||||||
|
{
|
||||||
|
typedef Tensor<float, 3>::DimensionPair DimPair;
|
||||||
|
array<DimPair, 1> dims({{DimPair(1, 0)}});
|
||||||
|
for (int i = 0; i < output.dimension(2); ++i) {
|
||||||
|
output.template chip<2>(i).device(device) = input1.chip<2>(i).contract(input2.chip<2>(i), dims);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static void test_custom_binary_op()
|
||||||
|
{
|
||||||
|
Tensor<float, 3> tensor1(2,3,5);
|
||||||
|
tensor1.setRandom();
|
||||||
|
Tensor<float, 3> tensor2(3,7,5);
|
||||||
|
tensor2.setRandom();
|
||||||
|
|
||||||
|
Tensor<float, 3> result = tensor1.customOp(tensor2, BatchMatMul());
|
||||||
|
for (int i = 0; i < 5; ++i) {
|
||||||
|
typedef Tensor<float, 3>::DimensionPair DimPair;
|
||||||
|
array<DimPair, 1> dims({{DimPair(1, 0)}});
|
||||||
|
Tensor<float, 2> reference = tensor1.chip<2>(i).contract(tensor2.chip<2>(i), dims);
|
||||||
|
TensorRef<Tensor<float, 2>> val = result.chip<2>(i);
|
||||||
|
for (int j = 0; j < 2; ++j) {
|
||||||
|
for (int k = 0; k < 7; ++k) {
|
||||||
|
VERIFY_IS_APPROX(val(j, k), reference(j, k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void test_cxx11_tensor_custom_op()
|
||||||
|
{
|
||||||
|
CALL_SUBTEST(test_custom_unary_op());
|
||||||
|
CALL_SUBTEST(test_custom_binary_op());
|
||||||
|
}
|
55
unsupported/test/cxx11_tensor_reduction_cuda.cpp
Normal file
55
unsupported/test/cxx11_tensor_reduction_cuda.cpp
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#define EIGEN_TEST_NO_LONGDOUBLE
|
||||||
|
#define EIGEN_TEST_NO_COMPLEX
|
||||||
|
#define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda
|
||||||
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
#include "main.h"
|
||||||
|
#include <unsupported/Eigen/CXX11/Tensor>
|
||||||
|
|
||||||
|
|
||||||
|
template<int DataLayout>
|
||||||
|
static void test_full_reductions() {
|
||||||
|
|
||||||
|
Eigen::GpuDevice gpu_device;
|
||||||
|
|
||||||
|
const int num_rows = internal::random<int>(1024, 5*1024);
|
||||||
|
const int num_cols = internal::random<int>(1024, 5*1024);
|
||||||
|
|
||||||
|
Tensor<float, 2, DataLayout> in(num_rows, num_cols);
|
||||||
|
in.setRandom();
|
||||||
|
|
||||||
|
Tensor<float, 1, DataLayout> full_redux(1);
|
||||||
|
full_redux = in.sum();
|
||||||
|
|
||||||
|
std::size_t in_bytes = in.size() * sizeof(float);
|
||||||
|
std::size_t out_bytes = full_redux.size() * sizeof(float);
|
||||||
|
float* gpu_in_ptr = static_cast<float*>(gpu_device.allocate(in_bytes));
|
||||||
|
float* gpu_out_ptr = static_cast<float*>(gpu_device.allocate(out_bytes));
|
||||||
|
gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
|
||||||
|
|
||||||
|
TensorMap<Tensor<float, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols);
|
||||||
|
TensorMap<Tensor<float, 1, DataLayout> > out_gpu(gpu_out_ptr, 1);
|
||||||
|
|
||||||
|
out_gpu.device(gpu_device) = in_gpu.sum();
|
||||||
|
|
||||||
|
Tensor<float, 1, DataLayout> full_redux_gpu(1);
|
||||||
|
gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
|
||||||
|
gpu_device.synchronize();
|
||||||
|
|
||||||
|
// Check that the CPU and GPU reductions return the same result.
|
||||||
|
VERIFY_IS_APPROX(full_redux(0), full_redux_gpu(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cxx11_tensor_reduction_cuda() {
|
||||||
|
CALL_SUBTEST(test_full_reductions<ColMajor>());
|
||||||
|
CALL_SUBTEST(test_full_reductions<RowMajor>());
|
||||||
|
}
|
@ -228,6 +228,29 @@ static void test_multithread_contraction_agrees_with_singlethread() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<int DataLayout>
|
||||||
|
static void test_multithreaded_reductions() {
|
||||||
|
const int num_threads = internal::random<int>(3, 11);
|
||||||
|
ThreadPool thread_pool(num_threads);
|
||||||
|
Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
|
||||||
|
|
||||||
|
const int num_rows = internal::random<int>(13, 732);
|
||||||
|
const int num_cols = internal::random<int>(13, 732);
|
||||||
|
Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
|
||||||
|
t1.setRandom();
|
||||||
|
|
||||||
|
Tensor<float, 1, DataLayout> full_redux(1);
|
||||||
|
full_redux = t1.sum();
|
||||||
|
|
||||||
|
Tensor<float, 1, DataLayout> full_redux_tp(1);
|
||||||
|
full_redux_tp.device(thread_pool_device) = t1.sum();
|
||||||
|
|
||||||
|
// Check that the single threaded and the multi threaded reductions return
|
||||||
|
// the same result.
|
||||||
|
VERIFY_IS_APPROX(full_redux(0), full_redux_tp(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void test_memcpy() {
|
static void test_memcpy() {
|
||||||
|
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
@ -271,6 +294,9 @@ void test_cxx11_tensor_thread_pool()
|
|||||||
CALL_SUBTEST(test_contraction_corner_cases<ColMajor>());
|
CALL_SUBTEST(test_contraction_corner_cases<ColMajor>());
|
||||||
CALL_SUBTEST(test_contraction_corner_cases<RowMajor>());
|
CALL_SUBTEST(test_contraction_corner_cases<RowMajor>());
|
||||||
|
|
||||||
|
CALL_SUBTEST(test_multithreaded_reductions<ColMajor>());
|
||||||
|
CALL_SUBTEST(test_multithreaded_reductions<RowMajor>());
|
||||||
|
|
||||||
CALL_SUBTEST(test_memcpy());
|
CALL_SUBTEST(test_memcpy());
|
||||||
|
|
||||||
CALL_SUBTEST(test_multithread_random());
|
CALL_SUBTEST(test_multithread_random());
|
||||||
|
112
unsupported/test/cxx11_tensor_volume_patch.cpp
Normal file
112
unsupported/test/cxx11_tensor_volume_patch.cpp
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
#include "main.h"
|
||||||
|
|
||||||
|
#include <Eigen/CXX11/Tensor>
|
||||||
|
|
||||||
|
using Eigen::Tensor;
|
||||||
|
|
||||||
|
static void test_single_voxel_patch()
|
||||||
|
{
|
||||||
|
Tensor<float, 5> tensor(4,2,3,5,7);
|
||||||
|
tensor.setRandom();
|
||||||
|
Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout();
|
||||||
|
|
||||||
|
Tensor<float, 6> single_voxel_patch;
|
||||||
|
single_voxel_patch = tensor.extract_volume_patches(1, 1, 1);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch.dimension(0), 4);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch.dimension(1), 1);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch.dimension(2), 1);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch.dimension(3), 1);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch.dimension(4), 2 * 3 * 5);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch.dimension(5), 7);
|
||||||
|
|
||||||
|
Tensor<float, 6, RowMajor> single_voxel_patch_row_major;
|
||||||
|
single_voxel_patch_row_major = tensor_row_major.extract_volume_patches(1, 1, 1);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(0), 7);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(1), 2 * 3 * 5);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(2), 1);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(3), 1);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(4), 1);
|
||||||
|
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(5), 4);
|
||||||
|
|
||||||
|
for (int i = 0; i < tensor.size(); ++i) {
|
||||||
|
VERIFY_IS_EQUAL(tensor.data()[i], single_voxel_patch.data()[i]);
|
||||||
|
VERIFY_IS_EQUAL(tensor_row_major.data()[i], single_voxel_patch_row_major.data()[i]);
|
||||||
|
VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void test_entire_volume_patch()
|
||||||
|
{
|
||||||
|
const int depth = 4;
|
||||||
|
const int patch_z = 2;
|
||||||
|
const int patch_y = 3;
|
||||||
|
const int patch_x = 5;
|
||||||
|
const int batch = 7;
|
||||||
|
|
||||||
|
Tensor<float, 5> tensor(depth, patch_z, patch_y, patch_x, batch);
|
||||||
|
tensor.setRandom();
|
||||||
|
Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout();
|
||||||
|
|
||||||
|
Tensor<float, 6> entire_volume_patch;
|
||||||
|
entire_volume_patch = tensor.extract_volume_patches(patch_z, patch_y, patch_x);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch.dimension(0), depth);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch.dimension(1), patch_z);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch.dimension(2), patch_y);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch.dimension(3), patch_x);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch.dimension(4), patch_z * patch_y * patch_x);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch.dimension(5), batch);
|
||||||
|
|
||||||
|
Tensor<float, 6, RowMajor> entire_volume_patch_row_major;
|
||||||
|
entire_volume_patch_row_major = tensor_row_major.extract_volume_patches(patch_z, patch_y, patch_x);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(0), batch);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(1), patch_z * patch_y * patch_x);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(2), patch_x);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(3), patch_y);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(4), patch_z);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(5), depth);
|
||||||
|
|
||||||
|
const int dz = patch_z - 1;
|
||||||
|
const int dy = patch_y - 1;
|
||||||
|
const int dx = patch_x - 1;
|
||||||
|
|
||||||
|
const int forward_pad_z = dz - dz / 2;
|
||||||
|
const int forward_pad_y = dy - dy / 2;
|
||||||
|
const int forward_pad_x = dx - dx / 2;
|
||||||
|
|
||||||
|
for (int pz = 0; pz < patch_z; pz++) {
|
||||||
|
for (int py = 0; py < patch_y; py++) {
|
||||||
|
for (int px = 0; px < patch_x; px++) {
|
||||||
|
const int patchId = pz + patch_z * (py + px * patch_y);
|
||||||
|
for (int z = 0; z < patch_z; z++) {
|
||||||
|
for (int y = 0; y < patch_y; y++) {
|
||||||
|
for (int x = 0; x < patch_x; x++) {
|
||||||
|
for (int b = 0; b < batch; b++) {
|
||||||
|
for (int d = 0; d < depth; d++) {
|
||||||
|
float expected = 0.0f;
|
||||||
|
float expected_row_major = 0.0f;
|
||||||
|
const int eff_z = z - forward_pad_z + pz;
|
||||||
|
const int eff_y = y - forward_pad_y + py;
|
||||||
|
const int eff_x = x - forward_pad_x + px;
|
||||||
|
if (eff_z >= 0 && eff_y >= 0 && eff_x >= 0 &&
|
||||||
|
eff_z < patch_z && eff_y < patch_y && eff_x < patch_x) {
|
||||||
|
expected = tensor(d, eff_z, eff_y, eff_x, b);
|
||||||
|
expected_row_major = tensor_row_major(b, eff_x, eff_y, eff_z, d);
|
||||||
|
}
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch(d, z, y, x, patchId, b), expected);
|
||||||
|
VERIFY_IS_EQUAL(entire_volume_patch_row_major(b, patchId, x, y, z, d), expected_row_major);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_cxx11_tensor_volume_patch()
|
||||||
|
{
|
||||||
|
CALL_SUBTEST(test_single_voxel_patch());
|
||||||
|
CALL_SUBTEST(test_entire_volume_patch());
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user