mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Merged eigen/eigen into default
This commit is contained in:
commit
9115896590
@ -83,22 +83,10 @@ template<typename Derived> class ArrayBase
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
|
||||
* exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
|
||||
* reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
|
||||
* PlainObject or const PlainObject&.
|
||||
*/
|
||||
typedef Array<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainObject;
|
||||
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
#endif // not EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
|
||||
|
@ -756,6 +756,26 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
|
||||
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||
}
|
||||
|
||||
template<typename Dst, typename Src, typename Func>
|
||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
||||
{
|
||||
Index dstRows = src.rows();
|
||||
Index dstCols = src.cols();
|
||||
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
|
||||
dst.resize(dstRows, dstCols);
|
||||
|
||||
// TODO check whether this is the right place to perform these checks:
|
||||
EIGEN_STATIC_ASSERT_LVALUE(Dst)
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
|
||||
|
||||
Assignment<Dst,Src,Func>::run(dst, src, func);
|
||||
}
|
||||
template<typename Dst, typename Src>
|
||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
|
||||
{
|
||||
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||
}
|
||||
|
||||
// forward declaration
|
||||
template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
|
||||
|
||||
@ -783,7 +803,6 @@ struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
|
||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
{
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||
|
||||
src.evalTo(dst);
|
||||
}
|
||||
};
|
||||
|
262
Eigen/src/Core/Assign_MKL.h
Normal file → Executable file
262
Eigen/src/Core/Assign_MKL.h
Normal file → Executable file
@ -1,6 +1,7 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation. All rights reserved.
|
||||
|
||||
Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
@ -37,17 +38,13 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Op> struct vml_call
|
||||
{ enum { IsSupported = 0 }; };
|
||||
|
||||
template<typename Dst, typename Src, typename UnaryOp>
|
||||
template<typename Dst, typename Src>
|
||||
class vml_assign_traits
|
||||
{
|
||||
private:
|
||||
enum {
|
||||
DstHasDirectAccess = Dst::Flags & DirectAccessBit,
|
||||
SrcHasDirectAccess = Src::Flags & DirectAccessBit,
|
||||
|
||||
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
|
||||
InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
|
||||
: int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
|
||||
@ -57,173 +54,118 @@ class vml_assign_traits
|
||||
: int(Dst::MaxRowsAtCompileTime),
|
||||
MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
|
||||
|
||||
MightEnableVml = vml_call<UnaryOp>::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess
|
||||
&& Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
|
||||
MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
|
||||
MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
|
||||
VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
|
||||
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD,
|
||||
MayEnableVml = MightEnableVml && LargeEnough,
|
||||
MayLinearize = MayEnableVml && MightLinearize
|
||||
LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
|
||||
};
|
||||
public:
|
||||
enum {
|
||||
Traversal = MayLinearize ? LinearVectorizedTraversal
|
||||
: MayEnableVml ? InnerVectorizedTraversal
|
||||
: DefaultTraversal
|
||||
EnableVml = MightEnableVml && LargeEnough,
|
||||
Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
|
||||
};
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling,
|
||||
int VmlTraversal = vml_assign_traits<Derived1, Derived2, UnaryOp>::Traversal >
|
||||
struct vml_assign_impl
|
||||
: assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>
|
||||
{
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
||||
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, InnerVectorizedTraversal>
|
||||
{
|
||||
typedef typename Derived1::Scalar Scalar;
|
||||
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
||||
{
|
||||
// in case we want to (or have to) skip VML at runtime we can call:
|
||||
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
||||
const Index innerSize = dst.innerSize();
|
||||
const Index outerSize = dst.outerSize();
|
||||
for(Index outer = 0; outer < outerSize; ++outer) {
|
||||
const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) :
|
||||
&(src.nestedExpression().coeffRef(0, outer));
|
||||
Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer));
|
||||
vml_call<UnaryOp>::run(src.functor(), innerSize, src_ptr, dst_ptr );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp, int Traversal, int Unrolling>
|
||||
struct vml_assign_impl<Derived1, Derived2, UnaryOp, Traversal, Unrolling, LinearVectorizedTraversal>
|
||||
{
|
||||
static inline void run(Derived1& dst, const CwiseUnaryOp<UnaryOp, Derived2>& src)
|
||||
{
|
||||
// in case we want to (or have to) skip VML at runtime we can call:
|
||||
// assign_impl<Derived1,Eigen::CwiseUnaryOp<UnaryOp, Derived2>,Traversal,Unrolling,BuiltIn>::run(dst,src);
|
||||
vml_call<UnaryOp>::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() );
|
||||
}
|
||||
};
|
||||
|
||||
// Macroses
|
||||
|
||||
#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \
|
||||
template<typename Derived1, typename Derived2, typename UnaryOp> \
|
||||
struct assign_impl<Derived1, Eigen::CwiseUnaryOp<UnaryOp, Derived2>, TRAVERSAL, UNROLLING, Specialized> { \
|
||||
static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp<UnaryOp, Derived2> &src) { \
|
||||
vml_assign_impl<Derived1,Derived2,UnaryOp,TRAVERSAL,UNROLLING>::run(dst, src); \
|
||||
} \
|
||||
};
|
||||
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling)
|
||||
EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling)
|
||||
|
||||
|
||||
#define EIGEN_PP_EXPAND(ARG) ARG
|
||||
#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
|
||||
#define EIGEN_MKL_VML_MODE VML_HA
|
||||
#define EIGEN_VMLMODE_EXPAND_LA , VML_HA
|
||||
#else
|
||||
#define EIGEN_MKL_VML_MODE VML_LA
|
||||
#define EIGEN_VMLMODE_EXPAND_LA , VML_LA
|
||||
#endif
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
||||
enum { IsSupported = 1 }; \
|
||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
|
||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
||||
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \
|
||||
} \
|
||||
#define EIGEN_VMLMODE_EXPAND__
|
||||
|
||||
#define EIGEN_VMLMODE_PREFIX_LA vm
|
||||
#define EIGEN_VMLMODE_PREFIX__ v
|
||||
#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||
template< typename DstXprType, typename SrcXprNested> \
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
|
||||
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
|
||||
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
|
||||
} else { \
|
||||
const Index outerSize = dst.outerSize(); \
|
||||
for(Index outer = 0; outer < outerSize; ++outer) { \
|
||||
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
|
||||
&(src.nestedExpression().coeffRef(0, outer)); \
|
||||
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
||||
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
|
||||
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}; \
|
||||
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
|
||||
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA)
|
||||
// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _)
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
|
||||
template< typename DstXprType, typename SrcXprNested> \
|
||||
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE>, \
|
||||
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml,EIGENTYPE>::type> { \
|
||||
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE> &/*func*/) { \
|
||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
|
||||
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.functor().m_exponent); \
|
||||
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
|
||||
{ \
|
||||
VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \
|
||||
(VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \
|
||||
} else { \
|
||||
const Index outerSize = dst.outerSize(); \
|
||||
for(Index outer = 0; outer < outerSize; ++outer) { \
|
||||
const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
|
||||
&(src.nestedExpression().coeffRef(0, outer)); \
|
||||
EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
|
||||
VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
|
||||
(VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
||||
enum { IsSupported = 1 }; \
|
||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& /*func*/, \
|
||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
||||
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
||||
VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \
|
||||
template<> struct vml_call< scalar_##EIGENOP##_op<EIGENTYPE> > { \
|
||||
enum { IsSupported = 1 }; \
|
||||
static inline void run( const scalar_##EIGENOP##_op<EIGENTYPE>& func, \
|
||||
int size, const EIGENTYPE* src, EIGENTYPE* dst) { \
|
||||
EIGENTYPE exponent = func.m_exponent; \
|
||||
MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \
|
||||
VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \
|
||||
(VMLTYPE*)dst, &vmlMode); \
|
||||
} \
|
||||
};
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP)
|
||||
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16)
|
||||
|
||||
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP)
|
||||
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sinh, Sinh)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cosh, Cosh)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(atan, Atan)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tanh, Tanh)
|
||||
//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log10, Log10)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt)
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(arg, Arg)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor)
|
||||
EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil)
|
||||
|
||||
// The vm*powx functions are not avaibale in the windows version of MKL.
|
||||
#ifndef _WIN32
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16)
|
||||
#endif
|
||||
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA)
|
||||
EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
@ -113,10 +113,10 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, Derived>(rows, cols, func);
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
@ -139,12 +139,12 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, Derived>(1, size, func);
|
||||
else return CwiseNullaryOp<CustomNullaryOp, Derived>(size, 1, func);
|
||||
if(RowsAtCompileTime == 1) return CwiseNullaryOp<CustomNullaryOp, PlainObject>(1, size, func);
|
||||
else return CwiseNullaryOp<CustomNullaryOp, PlainObject>(size, 1, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a matrix defined by a custom functor \a func
|
||||
@ -158,10 +158,10 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
|
||||
*/
|
||||
template<typename Derived>
|
||||
template<typename CustomNullaryOp>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
|
||||
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
|
||||
{
|
||||
return CwiseNullaryOp<CustomNullaryOp, Derived>(RowsAtCompileTime, ColsAtCompileTime, func);
|
||||
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
|
||||
}
|
||||
|
||||
/** \returns an expression of a constant matrix of value \a value
|
||||
|
@ -49,6 +49,8 @@ template<typename Derived> class DenseBase
|
||||
public:
|
||||
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator*;
|
||||
using internal::special_scalar_op_base<Derived,typename internal::traits<Derived>::Scalar,
|
||||
typename NumTraits<typename internal::traits<Derived>::Scalar>::Real>::operator/;
|
||||
|
||||
|
||||
/** Inner iterator type to iterate over the coefficients of a row or column.
|
||||
@ -178,6 +180,35 @@ template<typename Derived> class DenseBase
|
||||
};
|
||||
|
||||
enum { IsPlainObjectBase = 0 };
|
||||
|
||||
/** The plain matrix type corresponding to this expression.
|
||||
* \sa PlainObject */
|
||||
typedef Matrix<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainMatrix;
|
||||
|
||||
/** The plain array type corresponding to this expression.
|
||||
* \sa PlainObject */
|
||||
typedef Array<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainArray;
|
||||
|
||||
/** \brief The plain matrix or array type corresponding to this expression.
|
||||
*
|
||||
* This is not necessarily exactly the return type of eval(). In the case of plain matrices,
|
||||
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
|
||||
* that the return type of eval() is either PlainObject or const PlainObject&.
|
||||
*/
|
||||
typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
|
||||
PlainMatrix, PlainArray>::type PlainObject;
|
||||
|
||||
/** \returns the number of nonzero coefficients which is in practice the number
|
||||
* of stored coefficients. */
|
||||
@ -237,13 +268,12 @@ template<typename Derived> class DenseBase
|
||||
}
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,PlainObject> SequentialLinSpacedReturnType;
|
||||
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
|
||||
typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,PlainObject> RandomAccessLinSpacedReturnType;
|
||||
/** \internal the return type of MatrixBase::eigenvalues() */
|
||||
typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
|
||||
|
||||
@ -322,13 +352,13 @@ template<typename Derived> class DenseBase
|
||||
LinSpaced(const Scalar& low, const Scalar& high);
|
||||
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(Index size, const CustomNullaryOp& func);
|
||||
template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
|
||||
static const CwiseNullaryOp<CustomNullaryOp, Derived>
|
||||
static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
|
||||
NullaryExpr(const CustomNullaryOp& func);
|
||||
|
||||
EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
|
||||
@ -466,9 +496,10 @@ template<typename Derived> class DenseBase
|
||||
ConstColwiseReturnType colwise() const;
|
||||
ColwiseReturnType colwise();
|
||||
|
||||
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
|
||||
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
|
||||
static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();
|
||||
typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
|
||||
static const RandomReturnType Random(Index rows, Index cols);
|
||||
static const RandomReturnType Random(Index size);
|
||||
static const RandomReturnType Random();
|
||||
|
||||
template<typename ThenDerived,typename ElseDerived>
|
||||
const Select<Derived,ThenDerived,ElseDerived>
|
||||
|
@ -349,6 +349,7 @@ struct hypot_retval
|
||||
template<typename OldType, typename NewType>
|
||||
struct cast_impl
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline NewType run(const OldType& x)
|
||||
{
|
||||
return static_cast<NewType>(x);
|
||||
@ -360,6 +361,7 @@ struct cast_impl
|
||||
template<typename OldType, typename NewType>
|
||||
inline NewType cast(const OldType& x)
|
||||
{
|
||||
EIGEN_DEVICE_FUNC
|
||||
return cast_impl<OldType, NewType>::run(x);
|
||||
}
|
||||
|
||||
|
@ -81,6 +81,7 @@ template<typename Derived> class MatrixBase
|
||||
using Base::operator*=;
|
||||
using Base::operator/=;
|
||||
using Base::operator*;
|
||||
using Base::operator/;
|
||||
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType;
|
||||
@ -101,23 +102,11 @@ template<typename Derived> class MatrixBase
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
|
||||
|
||||
/** \brief The plain matrix type corresponding to this expression.
|
||||
*
|
||||
* This is not necessarily exactly the return type of eval(). In the case of plain matrices,
|
||||
* the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
|
||||
* that the return type of eval() is either PlainObject or const PlainObject&.
|
||||
*/
|
||||
typedef Matrix<typename internal::traits<Derived>::Scalar,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
internal::traits<Derived>::ColsAtCompileTime,
|
||||
AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
|
||||
internal::traits<Derived>::MaxRowsAtCompileTime,
|
||||
internal::traits<Derived>::MaxColsAtCompileTime
|
||||
> PlainObject;
|
||||
typedef typename Base::PlainObject PlainObject;
|
||||
|
||||
#ifndef EIGEN_PARSED_BY_DOXYGEN
|
||||
/** \internal Represents a matrix with all coefficients equal to one another*/
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
|
||||
typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
|
||||
/** \internal the return type of MatrixBase::adjoint() */
|
||||
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
|
||||
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
|
||||
@ -126,7 +115,7 @@ template<typename Derived> class MatrixBase
|
||||
/** \internal Return type of eigenvalues() */
|
||||
typedef Matrix<std::complex<RealScalar>, internal::traits<Derived>::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType;
|
||||
/** \internal the return type of identity */
|
||||
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,Derived> IdentityReturnType;
|
||||
typedef CwiseNullaryOp<internal::scalar_identity_op<Scalar>,PlainObject> IdentityReturnType;
|
||||
/** \internal the return type of unit vectors */
|
||||
typedef Block<const CwiseNullaryOp<internal::scalar_identity_op<Scalar>, SquareMatrixType>,
|
||||
internal::traits<Derived>::RowsAtCompileTime,
|
||||
|
5
Eigen/src/Core/ProductEvaluators.h
Normal file → Executable file
5
Eigen/src/Core/ProductEvaluators.h
Normal file → Executable file
@ -751,7 +751,6 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
||||
using Base::m_diagImpl;
|
||||
using Base::m_matImpl;
|
||||
using Base::coeff;
|
||||
using Base::packet_impl;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::PacketScalar PacketScalar;
|
||||
|
||||
@ -776,7 +775,8 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
|
||||
{
|
||||
// NVCC complains about template keyword, so we disable this function in CUDA mode
|
||||
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
||||
// See also similar calls below.
|
||||
return this->template packet_impl<LoadMode>(row,col, row,
|
||||
typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
|
||||
}
|
||||
@ -798,7 +798,6 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
||||
using Base::m_diagImpl;
|
||||
using Base::m_matImpl;
|
||||
using Base::coeff;
|
||||
using Base::packet_impl;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::PacketScalar PacketScalar;
|
||||
|
||||
|
@ -53,7 +53,7 @@ struct functor_traits<scalar_random_op<Scalar> >
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
inline const typename DenseBase<Derived>::RandomReturnType
|
||||
DenseBase<Derived>::Random(Index rows, Index cols)
|
||||
{
|
||||
return NullaryExpr(rows, cols, internal::scalar_random_op<Scalar>());
|
||||
@ -84,7 +84,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
inline const typename DenseBase<Derived>::RandomReturnType
|
||||
DenseBase<Derived>::Random(Index size)
|
||||
{
|
||||
return NullaryExpr(size, internal::scalar_random_op<Scalar>());
|
||||
@ -110,7 +110,7 @@ DenseBase<Derived>::Random(Index size)
|
||||
* \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
|
||||
*/
|
||||
template<typename Derived>
|
||||
inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
|
||||
inline const typename DenseBase<Derived>::RandomReturnType
|
||||
DenseBase<Derived>::Random()
|
||||
{
|
||||
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op<Scalar>());
|
||||
|
@ -70,10 +70,6 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||
using Base::IsRowMajor;
|
||||
|
||||
// next line is necessary because otherwise const version of operator()
|
||||
// is hidden by non-const version defined in this file
|
||||
using Base::operator();
|
||||
|
||||
protected:
|
||||
enum {
|
||||
PacketSize = internal::packet_traits<Scalar>::size,
|
||||
@ -101,69 +97,6 @@ template<typename MatrixType, int Direction> class Reverse
|
||||
return -m_matrix.innerStride();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col)
|
||||
{
|
||||
eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
|
||||
return coeffRef(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
|
||||
ReverseCol ? m_matrix.cols() - col - 1 : col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const
|
||||
{
|
||||
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
|
||||
ReverseCol ? m_matrix.cols() - col - 1 : col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_matrix.coeff(m_matrix.size() - index - 1);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
|
||||
{
|
||||
return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar& operator()(Index index)
|
||||
{
|
||||
eigen_assert(index >= 0 && index < m_matrix.size());
|
||||
return coeffRef(index);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index row, Index col) const
|
||||
{
|
||||
return reverse_packet::run(m_matrix.template packet<LoadMode>(
|
||||
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
|
||||
ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index row, Index col, const PacketScalar& x)
|
||||
{
|
||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(
|
||||
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
|
||||
ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
|
||||
reverse_packet::run(x));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline const PacketScalar packet(Index index) const
|
||||
{
|
||||
return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
inline void writePacket(Index index, const PacketScalar& x)
|
||||
{
|
||||
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
|
||||
nestedExpression() const
|
||||
{
|
||||
|
@ -157,6 +157,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
||||
MatrixBase<Derived>::stableNorm() const
|
||||
{
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
const Index blockSize = 4096;
|
||||
RealScalar scale(0);
|
||||
RealScalar invScale(1);
|
||||
@ -164,12 +165,18 @@ MatrixBase<Derived>::stableNorm() const
|
||||
enum {
|
||||
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
|
||||
};
|
||||
typedef typename internal::conditional<Alignment, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, Aligned>,
|
||||
typename Base::ConstSegmentReturnType>::type SegmentWrapper;
|
||||
Index n = size();
|
||||
|
||||
if(n==1)
|
||||
return abs(this->coeff(0));
|
||||
|
||||
Index bi = internal::first_aligned(derived());
|
||||
if (bi>0)
|
||||
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
|
||||
for (; bi<n; bi+=blockSize)
|
||||
internal::stable_norm_kernel(this->segment(bi,numext::mini(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
|
||||
internal::stable_norm_kernel(SegmentWrapper(this->segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
|
||||
return scale * sqrt(ssq);
|
||||
}
|
||||
|
||||
|
@ -392,6 +392,18 @@ template<typename Scalar>
|
||||
struct functor_traits<scalar_quotient1_op<Scalar> >
|
||||
{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
|
||||
|
||||
template<typename Scalar1, typename Scalar2>
|
||||
struct scalar_quotient2_op {
|
||||
typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; }
|
||||
typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
|
||||
};
|
||||
template<typename Scalar1,typename Scalar2>
|
||||
struct functor_traits<scalar_quotient2_op<Scalar1,Scalar2> >
|
||||
{ enum { Cost = 2 * NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
|
||||
|
||||
// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
|
||||
// where the mixing of different types is handled by scalar_product_traits
|
||||
// In particular, real * complex<real> is allowed.
|
||||
|
25
Eigen/src/Core/products/GeneralMatrixVector_MKL.h
Normal file → Executable file
25
Eigen/src/Core/products/GeneralMatrixVector_MKL.h
Normal file → Executable file
@ -46,38 +46,37 @@ namespace internal {
|
||||
|
||||
// gemv specialization
|
||||
|
||||
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct general_matrix_vector_product_gemv :
|
||||
general_matrix_vector_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,ConjugateRhs,BuiltIn> {};
|
||||
template<typename Index, typename LhsScalar, int StorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs>
|
||||
struct general_matrix_vector_product_gemv;
|
||||
|
||||
#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
|
||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
||||
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,Specialized> { \
|
||||
static void run( \
|
||||
Index rows, Index cols, \
|
||||
const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsIncr, \
|
||||
const const_blas_data_mapper<Scalar,Index,ColMajor> &lhs, \
|
||||
const const_blas_data_mapper<Scalar,Index,RowMajor> &rhs, \
|
||||
Scalar* res, Index resIncr, Scalar alpha) \
|
||||
{ \
|
||||
if (ConjugateLhs) { \
|
||||
general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,BuiltIn>::run( \
|
||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
||||
general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,ConjugateRhs,BuiltIn>::run( \
|
||||
rows, cols, lhs, rhs, res, resIncr, alpha); \
|
||||
} else { \
|
||||
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
||||
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
||||
} \
|
||||
} \
|
||||
}; \
|
||||
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
|
||||
struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
|
||||
struct general_matrix_vector_product<Index,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>,ConjugateRhs,Specialized> { \
|
||||
static void run( \
|
||||
Index rows, Index cols, \
|
||||
const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsIncr, \
|
||||
const const_blas_data_mapper<Scalar,Index,RowMajor> &lhs, \
|
||||
const const_blas_data_mapper<Scalar,Index,ColMajor> &rhs, \
|
||||
Scalar* res, Index resIncr, Scalar alpha) \
|
||||
{ \
|
||||
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
|
||||
rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \
|
||||
rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \
|
||||
} \
|
||||
}; \
|
||||
|
||||
|
4
Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
Normal file → Executable file
4
Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
Normal file → Executable file
@ -122,7 +122,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
||||
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
|
||||
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
|
||||
MKL_INT aStride = aa_tmp.outerStride(); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
|
||||
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
||||
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
|
||||
\
|
||||
@ -236,7 +236,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
||||
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
|
||||
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
|
||||
MKL_INT aStride = aa_tmp.outerStride(); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
|
||||
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
|
||||
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
|
||||
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
|
||||
\
|
||||
|
1
Eigen/src/Core/util/BlasUtil.h
Normal file → Executable file
1
Eigen/src/Core/util/BlasUtil.h
Normal file → Executable file
@ -224,6 +224,7 @@ class blas_data_mapper {
|
||||
}
|
||||
|
||||
const Index stride() const { return m_stride; }
|
||||
const Scalar* data() const { return m_data; }
|
||||
|
||||
Index firstAligned(Index size) const {
|
||||
if (size_t(m_data)%sizeof(Scalar)) {
|
||||
|
@ -213,6 +213,7 @@ template<typename Scalar> struct scalar_identity_op;
|
||||
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
|
||||
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
|
||||
template<typename LhsScalar,typename RhsScalar> struct scalar_quotient2_op;
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
@ -645,7 +645,7 @@ namespace Eigen {
|
||||
// just an empty macro !
|
||||
#define EIGEN_EMPTY
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1800 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
|
||||
#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
|
||||
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
|
||||
using Base::operator =;
|
||||
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
|
||||
|
@ -427,7 +427,9 @@ struct special_scalar_op_base : public DenseCoeffsBase<Derived>
|
||||
{
|
||||
// dummy operator* so that the
|
||||
// "using special_scalar_op_base::operator*" compiles
|
||||
void operator*() const;
|
||||
struct dummy {};
|
||||
void operator*(dummy) const;
|
||||
void operator/(dummy) const;
|
||||
};
|
||||
|
||||
template<typename Derived,typename Scalar,typename OtherScalar>
|
||||
@ -451,6 +453,16 @@ struct special_scalar_op_base<Derived,Scalar,OtherScalar,true> : public DenseCo
|
||||
#endif
|
||||
return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
|
||||
}
|
||||
|
||||
const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
|
||||
operator/(const OtherScalar& scalar) const
|
||||
{
|
||||
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||
#endif
|
||||
return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
|
||||
(*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename XprType, typename CastType> struct cast_return_type
|
||||
|
@ -486,10 +486,11 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag
|
||||
|
||||
while (end>0)
|
||||
{
|
||||
EIGEN_ASM_COMMENT("beginabs");
|
||||
for (Index i = start; i<end; ++i)
|
||||
if (internal::isMuchSmallerThan(abs(subdiag[i]),(abs(diag[i])+abs(diag[i+1]))) || abs(subdiag[i]) <= considerAsZero)
|
||||
subdiag[i] = 0;
|
||||
|
||||
EIGEN_ASM_COMMENT("endabs");
|
||||
// find the largest unreduced block
|
||||
while (end>0 && subdiag[end-1]==0)
|
||||
{
|
||||
|
@ -464,9 +464,10 @@ struct tridiagonalization_inplace_selector<MatrixType,3,false>
|
||||
static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
|
||||
{
|
||||
using std::sqrt;
|
||||
const RealScalar tol = (std::numeric_limits<RealScalar>::min)();
|
||||
diag[0] = mat(0,0);
|
||||
RealScalar v1norm2 = numext::abs2(mat(2,0));
|
||||
if(v1norm2 == RealScalar(0))
|
||||
if(v1norm2 <= tol)
|
||||
{
|
||||
diag[1] = mat(1,1);
|
||||
diag[2] = mat(2,2);
|
||||
|
@ -18,6 +18,10 @@ namespace Eigen {
|
||||
* \returns the cross product of \c *this and \a other
|
||||
*
|
||||
* Here is a very good explanation of cross-product: http://xkcd.com/199/
|
||||
*
|
||||
* With complex numbers, the cross product is implemented as
|
||||
* \f$ (\mathbf{a}+i\mathbf{b}) \times (\mathbf{c}+i\mathbf{d}) = (\mathbf{a} \times \mathbf{c} - \mathbf{b} \times \mathbf{d}) - i(\mathbf{a} \times \mathbf{d} - \mathbf{b} \times \mathbf{c})\f$
|
||||
*
|
||||
* \sa MatrixBase::cross3()
|
||||
*/
|
||||
template<typename Derived>
|
||||
|
@ -75,8 +75,9 @@ void MatrixBase<Derived>::makeHouseholder(
|
||||
|
||||
RealScalar tailSqNorm = size()==1 ? RealScalar(0) : tail.squaredNorm();
|
||||
Scalar c0 = coeff(0);
|
||||
const RealScalar tol = (std::numeric_limits<RealScalar>::min)();
|
||||
|
||||
if(tailSqNorm == RealScalar(0) && numext::imag(c0)==RealScalar(0))
|
||||
if(tailSqNorm <= tol && numext::abs2(numext::imag(c0))<=tol)
|
||||
{
|
||||
tau = RealScalar(0);
|
||||
beta = numext::real(c0);
|
||||
|
@ -136,6 +136,12 @@ struct traits<BiCGSTAB<_MatrixType,_Preconditioner> >
|
||||
* and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
|
||||
* and NumTraits<Scalar>::epsilon() for the tolerance.
|
||||
*
|
||||
* The tolerance corresponds to the relative residual error: |Ax-b|/|b|
|
||||
*
|
||||
* \b Performance: when using sparse matrices, best performance is achied for a row-major sparse matrix format.
|
||||
* Moreover, in this case multi-threading can be exploited if the user code is compiled with OpenMP enabled.
|
||||
* See \ref TopicMultiThreading for details.
|
||||
*
|
||||
* This class can be used as the direct solver classes. Here is a typical usage example:
|
||||
* \include BiCGSTAB_simple.cpp
|
||||
*
|
||||
|
@ -114,20 +114,28 @@ struct traits<ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> >
|
||||
*
|
||||
* \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix.
|
||||
* \tparam _UpLo the triangular part that will be used for the computations. It can be Lower,
|
||||
* Upper, or Lower|Upper in which the full matrix entries will be considered. Default is Lower.
|
||||
* \c Upper, or \c Lower|Upper in which the full matrix entries will be considered.
|
||||
* Default is \c Lower, best performance is \c Lower|Upper.
|
||||
* \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
|
||||
*
|
||||
* The maximal number of iterations and tolerance value can be controlled via the setMaxIterations()
|
||||
* and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations
|
||||
* and NumTraits<Scalar>::epsilon() for the tolerance.
|
||||
*
|
||||
* The tolerance corresponds to the relative residual error: |Ax-b|/|b|
|
||||
*
|
||||
* \b Performance: Even though the default value of \c _UpLo is \c Lower, significantly higher performance is
|
||||
* achieved when using a complete matrix and \b Lower|Upper as the \a _UpLo template parameter. Moreover, in this
|
||||
* case multi-threading can be exploited if the user code is compiled with OpenMP enabled.
|
||||
* See \ref TopicMultiThreading for details.
|
||||
*
|
||||
* This class can be used as the direct solver classes. Here is a typical usage example:
|
||||
\code
|
||||
int n = 10000;
|
||||
VectorXd x(n), b(n);
|
||||
SparseMatrix<double> A(n,n);
|
||||
// fill A and b
|
||||
ConjugateGradient<SparseMatrix<double> > cg;
|
||||
ConjugateGradient<SparseMatrix<double>, Lower|Upper> cg;
|
||||
cg.compute(A);
|
||||
x = cg.solve(b);
|
||||
std::cout << "#iterations: " << cg.iterations() << std::endl;
|
||||
@ -183,10 +191,13 @@ public:
|
||||
template<typename Rhs,typename Dest>
|
||||
void _solve_with_guess_impl(const Rhs& b, Dest& x) const
|
||||
{
|
||||
typedef Ref<const MatrixType> MatRef;
|
||||
typedef typename internal::conditional<UpLo==(Lower|Upper) && (!MatrixType::IsRowMajor) && (!NumTraits<Scalar>::IsComplex),
|
||||
Transpose<const MatRef>, MatRef const&>::type RowMajorWrapper;
|
||||
typedef typename internal::conditional<UpLo==(Lower|Upper),
|
||||
Ref<const MatrixType>&,
|
||||
typename Ref<const MatrixType>::template ConstSelfAdjointViewReturnType<UpLo>::Type
|
||||
>::type MatrixWrapperType;
|
||||
RowMajorWrapper,
|
||||
typename MatRef::template ConstSelfAdjointViewReturnType<UpLo>::Type
|
||||
>::type SelfAdjointWrapper;
|
||||
m_iterations = Base::maxIterations();
|
||||
m_error = Base::m_tolerance;
|
||||
|
||||
@ -196,7 +207,8 @@ public:
|
||||
m_error = Base::m_tolerance;
|
||||
|
||||
typename Dest::ColXpr xj(x,j);
|
||||
internal::conjugate_gradient(MatrixWrapperType(mp_matrix), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error);
|
||||
RowMajorWrapper row_mat(mp_matrix);
|
||||
internal::conjugate_gradient(SelfAdjointWrapper(row_mat), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error);
|
||||
}
|
||||
|
||||
m_isInitialized = true;
|
||||
|
@ -126,10 +126,16 @@ public:
|
||||
/** \internal */
|
||||
Index cols() const { return mp_matrix.cols(); }
|
||||
|
||||
/** \returns the tolerance threshold used by the stopping criteria */
|
||||
/** \returns the tolerance threshold used by the stopping criteria.
|
||||
* \sa setTolerance()
|
||||
*/
|
||||
RealScalar tolerance() const { return m_tolerance; }
|
||||
|
||||
/** Sets the tolerance threshold used by the stopping criteria */
|
||||
/** Sets the tolerance threshold used by the stopping criteria.
|
||||
*
|
||||
* This value is used as an upper bound to the relative residual error: |Ax-b|/|b|.
|
||||
* The default value is the machine precision given by NumTraits<Scalar>::epsilon()
|
||||
*/
|
||||
Derived& setTolerance(const RealScalar& tolerance)
|
||||
{
|
||||
m_tolerance = tolerance;
|
||||
@ -167,7 +173,9 @@ public:
|
||||
return m_iterations;
|
||||
}
|
||||
|
||||
/** \returns the tolerance error reached during the last solve */
|
||||
/** \returns the tolerance error reached during the last solve.
|
||||
* It is a close approximation of the true relative residual error |Ax-b|/|b|.
|
||||
*/
|
||||
RealScalar error() const
|
||||
{
|
||||
eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
|
||||
|
49
Eigen/src/PardisoSupport/PardisoSupport.h
Normal file → Executable file
49
Eigen/src/PardisoSupport/PardisoSupport.h
Normal file → Executable file
@ -54,7 +54,7 @@ namespace internal
|
||||
template<>
|
||||
struct pardiso_run_selector<long long int>
|
||||
{
|
||||
typedef long long int IndexTypeType;
|
||||
typedef long long int IndexType;
|
||||
static IndexType run( _MKL_DSS_HANDLE_t pt, IndexType maxfct, IndexType mnum, IndexType type, IndexType phase, IndexType n, void *a,
|
||||
IndexType *ia, IndexType *ja, IndexType *perm, IndexType nrhs, IndexType *iparm, IndexType msglvl, void *b, void *x)
|
||||
{
|
||||
@ -93,19 +93,19 @@ namespace internal
|
||||
typedef typename _MatrixType::StorageIndex StorageIndex;
|
||||
};
|
||||
|
||||
}
|
||||
} // end namespace internal
|
||||
|
||||
template<class Derived>
|
||||
class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
|
||||
class PardisoImpl : public SparseSolverBase<Derived>
|
||||
{
|
||||
protected:
|
||||
typedef SparseSolveBase<PardisoImpl<Derived> Base;
|
||||
typedef SparseSolverBase<Derived> Base;
|
||||
using Base::derived;
|
||||
using Base::m_isInitialized;
|
||||
|
||||
typedef internal::pardiso_traits<Derived> Traits;
|
||||
public:
|
||||
using base::_solve_impl;
|
||||
using Base::_solve_impl;
|
||||
|
||||
typedef typename Traits::MatrixType MatrixType;
|
||||
typedef typename Traits::Scalar Scalar;
|
||||
@ -173,16 +173,17 @@ class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
|
||||
|
||||
Derived& compute(const MatrixType& matrix);
|
||||
|
||||
template<typename BDerived, typename XDerived>
|
||||
bool _solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const;
|
||||
template<typename Rhs,typename Dest>
|
||||
void _solve_impl(const MatrixBase<Rhs> &b, MatrixBase<Dest> &dest) const;
|
||||
|
||||
protected:
|
||||
void pardisoRelease()
|
||||
{
|
||||
if(m_isInitialized) // Factorization ran at least once
|
||||
{
|
||||
internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, -1, m_size, 0, 0, 0, m_perm.data(), 0,
|
||||
m_iparm.data(), m_msglvl, 0, 0);
|
||||
internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, -1, m_size,0, 0, 0, m_perm.data(), 0,
|
||||
m_iparm.data(), m_msglvl, NULL, NULL);
|
||||
m_isInitialized = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -217,12 +218,14 @@ class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
|
||||
m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0;
|
||||
m_iparm[34] = 1; // C indexing
|
||||
m_iparm[59] = 1; // Automatic switch between In-Core and Out-of-Core modes
|
||||
|
||||
memset(m_pt, 0, sizeof(m_pt));
|
||||
}
|
||||
|
||||
protected:
|
||||
// cached data to reduce reallocation, etc.
|
||||
|
||||
void manageErrorCode(Index error)
|
||||
void manageErrorCode(Index error) const
|
||||
{
|
||||
switch(error)
|
||||
{
|
||||
@ -239,7 +242,7 @@ class PardisoImpl : public SparseSolveBase<PardisoImpl<Derived>
|
||||
}
|
||||
|
||||
mutable SparseMatrixType m_matrix;
|
||||
ComputationInfo m_info;
|
||||
mutable ComputationInfo m_info;
|
||||
bool m_analysisIsOk, m_factorizationIsOk;
|
||||
Index m_type, m_msglvl;
|
||||
mutable void *m_pt[64];
|
||||
@ -256,7 +259,6 @@ Derived& PardisoImpl<Derived>::compute(const MatrixType& a)
|
||||
eigen_assert(a.rows() == a.cols());
|
||||
|
||||
pardisoRelease();
|
||||
memset(m_pt, 0, sizeof(m_pt));
|
||||
m_perm.setZero(m_size);
|
||||
derived().getMatrix(a);
|
||||
|
||||
@ -279,7 +281,6 @@ Derived& PardisoImpl<Derived>::analyzePattern(const MatrixType& a)
|
||||
eigen_assert(m_size == a.cols());
|
||||
|
||||
pardisoRelease();
|
||||
memset(m_pt, 0, sizeof(m_pt));
|
||||
m_perm.setZero(m_size);
|
||||
derived().getMatrix(a);
|
||||
|
||||
@ -313,12 +314,15 @@ Derived& PardisoImpl<Derived>::factorize(const MatrixType& a)
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<class Base>
|
||||
template<class Derived>
|
||||
template<typename BDerived,typename XDerived>
|
||||
bool PardisoImpl<Base>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const
|
||||
void PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const
|
||||
{
|
||||
if(m_iparm[0] == 0) // Factorization was not computed
|
||||
return false;
|
||||
{
|
||||
m_info = InvalidInput;
|
||||
return;
|
||||
}
|
||||
|
||||
//Index n = m_matrix.rows();
|
||||
Index nrhs = Index(b.cols());
|
||||
@ -353,7 +357,7 @@ bool PardisoImpl<Base>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase<XD
|
||||
m_perm.data(), nrhs, m_iparm.data(), m_msglvl,
|
||||
rhs_ptr, x.derived().data());
|
||||
|
||||
return error==0;
|
||||
manageErrorCode(error);
|
||||
}
|
||||
|
||||
|
||||
@ -373,7 +377,7 @@ template<typename MatrixType>
|
||||
class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
|
||||
{
|
||||
protected:
|
||||
typedef PardisoImpl< PardisoLU<MatrixType> > Base;
|
||||
typedef PardisoImpl<PardisoLU> Base;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::RealScalar RealScalar;
|
||||
using Base::pardisoInit;
|
||||
@ -401,6 +405,7 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
|
||||
void getMatrix(const MatrixType& matrix)
|
||||
{
|
||||
m_matrix = matrix;
|
||||
m_matrix.makeCompressed();
|
||||
}
|
||||
};
|
||||
|
||||
@ -424,7 +429,6 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
|
||||
protected:
|
||||
typedef PardisoImpl< PardisoLLT<MatrixType,_UpLo> > Base;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::StorageIndex StorageIndex;
|
||||
typedef typename Base::RealScalar RealScalar;
|
||||
using Base::pardisoInit;
|
||||
using Base::m_matrix;
|
||||
@ -432,9 +436,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
|
||||
|
||||
public:
|
||||
|
||||
typedef typename Base::StorageIndex StorageIndex;
|
||||
enum { UpLo = _UpLo };
|
||||
using Base::compute;
|
||||
using Base::solve;
|
||||
|
||||
PardisoLLT()
|
||||
: Base()
|
||||
@ -457,6 +461,7 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
|
||||
PermutationMatrix<Dynamic,Dynamic,StorageIndex> p_null;
|
||||
m_matrix.resize(matrix.rows(), matrix.cols());
|
||||
m_matrix.template selfadjointView<Upper>() = matrix.template selfadjointView<UpLo>().twistedBy(p_null);
|
||||
m_matrix.makeCompressed();
|
||||
}
|
||||
};
|
||||
|
||||
@ -482,7 +487,6 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
|
||||
protected:
|
||||
typedef PardisoImpl< PardisoLDLT<MatrixType,Options> > Base;
|
||||
typedef typename Base::Scalar Scalar;
|
||||
typedef typename Base::StorageIndex StorageIndex;
|
||||
typedef typename Base::RealScalar RealScalar;
|
||||
using Base::pardisoInit;
|
||||
using Base::m_matrix;
|
||||
@ -490,8 +494,8 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
|
||||
|
||||
public:
|
||||
|
||||
typedef typename Base::StorageIndex StorageIndex;
|
||||
using Base::compute;
|
||||
using Base::solve;
|
||||
enum { UpLo = Options&(Upper|Lower) };
|
||||
|
||||
PardisoLDLT()
|
||||
@ -513,6 +517,7 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT<MatrixType,Options> >
|
||||
PermutationMatrix<Dynamic,Dynamic,StorageIndex> p_null;
|
||||
m_matrix.resize(matrix.rows(), matrix.cols());
|
||||
m_matrix.template selfadjointView<Upper>() = matrix.template selfadjointView<UpLo>().twistedBy(p_null);
|
||||
m_matrix.makeCompressed();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -50,9 +50,9 @@ class CompressedStorage
|
||||
|
||||
CompressedStorage& operator=(const CompressedStorage& other)
|
||||
{
|
||||
resize(other.size());
|
||||
if(other.size()>0)
|
||||
{
|
||||
resize(other.size());
|
||||
internal::smart_copy(other.m_values, other.m_values + m_size, m_values);
|
||||
internal::smart_copy(other.m_indices, other.m_indices + m_size, m_indices);
|
||||
}
|
||||
|
@ -16,8 +16,7 @@ template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
Derived& SparseMatrixBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
|
||||
{
|
||||
// TODO use the evaluator mechanism
|
||||
other.derived().evalTo(derived());
|
||||
internal::call_assignment_no_alias(derived(), other.derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
@ -182,6 +181,39 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar
|
||||
}
|
||||
};
|
||||
|
||||
struct Diagonal2Sparse {};
|
||||
|
||||
template<> struct AssignmentKind<SparseShape,DiagonalShape> { typedef Diagonal2Sparse Kind; };
|
||||
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Sparse, Scalar>
|
||||
{
|
||||
typedef typename DstXprType::StorageIndex StorageIndex;
|
||||
typedef Array<StorageIndex,Dynamic,1> ArrayXI;
|
||||
typedef Array<Scalar,Dynamic,1> ArrayXS;
|
||||
template<int Options>
|
||||
static void run(SparseMatrix<Scalar,Options,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
{
|
||||
Index size = src.diagonal().size();
|
||||
dst.makeCompressed();
|
||||
dst.resizeNonZeros(size);
|
||||
Map<ArrayXI>(dst.innerIndexPtr(), size).setLinSpaced(0,StorageIndex(size)-1);
|
||||
Map<ArrayXI>(dst.outerIndexPtr(), size+1).setLinSpaced(0,StorageIndex(size));
|
||||
Map<ArrayXS>(dst.valuePtr(), size) = src.diagonal();
|
||||
}
|
||||
|
||||
template<typename DstDerived>
|
||||
static void run(SparseMatrixBase<DstDerived> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
{
|
||||
dst.diagonal() = src.diagonal();
|
||||
}
|
||||
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
{ dst.diagonal() += src.diagonal(); }
|
||||
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
{ dst.diagonal() -= src.diagonal(); }
|
||||
};
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -390,6 +390,22 @@ SparseMatrixBase<Derived>::operator+=(const SparseMatrixBase<OtherDerived>& othe
|
||||
return derived() = derived() + other.derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
Derived& SparseMatrixBase<Derived>::operator+=(const DiagonalBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
Derived& SparseMatrixBase<Derived>::operator-=(const DiagonalBase<OtherDerived>& other)
|
||||
{
|
||||
call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar>());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE
|
||||
|
@ -30,23 +30,48 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
|
||||
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
|
||||
typedef typename internal::remove_all<DenseResType>::type Res;
|
||||
typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
|
||||
typedef typename evaluator<Lhs>::type LhsEval;
|
||||
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
|
||||
{
|
||||
typename evaluator<Lhs>::type lhsEval(lhs);
|
||||
LhsEval lhsEval(lhs);
|
||||
|
||||
Index n = lhs.outerSize();
|
||||
#ifdef EIGEN_HAS_OPENMP
|
||||
Eigen::initParallel();
|
||||
Index threads = Eigen::nbThreads();
|
||||
#endif
|
||||
|
||||
for(Index c=0; c<rhs.cols(); ++c)
|
||||
{
|
||||
Index n = lhs.outerSize();
|
||||
for(Index j=0; j<n; ++j)
|
||||
#ifdef EIGEN_HAS_OPENMP
|
||||
// This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
|
||||
// It basically represents the minimal amount of work to be done to be worth it.
|
||||
if(threads>1 && lhs.nonZeros() > 20000)
|
||||
{
|
||||
typename Res::Scalar tmp(0);
|
||||
for(LhsInnerIterator it(lhsEval,j); it ;++it)
|
||||
tmp += it.value() * rhs.coeff(it.index(),c);
|
||||
res.coeffRef(j,c) += alpha * tmp;
|
||||
#pragma omp parallel for schedule(static) num_threads(threads)
|
||||
for(Index i=0; i<n; ++i)
|
||||
processRow(lhsEval,rhs,res,alpha,i,c);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for(Index i=0; i<n; ++i)
|
||||
processRow(lhsEval,rhs,res,alpha,i,c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha, Index i, Index col)
|
||||
{
|
||||
typename Res::Scalar tmp(0);
|
||||
for(LhsInnerIterator it(lhsEval,i); it ;++it)
|
||||
tmp += it.value() * rhs.coeff(it.index(),col);
|
||||
res.coeffRef(i,col) += alpha * tmp;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
|
||||
template<typename T1, typename T2/*, int _Options, typename _StrideType*/>
|
||||
struct scalar_product_traits<T1, Ref<T2/*, _Options, _StrideType*/> >
|
||||
{
|
||||
|
@ -97,8 +97,8 @@ class SparseMatrix
|
||||
using Base::isCompressed;
|
||||
using Base::nonZeros;
|
||||
_EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix)
|
||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, +=)
|
||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=)
|
||||
using Base::operator+=;
|
||||
using Base::operator-=;
|
||||
|
||||
typedef MappedSparseMatrix<Scalar,Flags> Map;
|
||||
typedef Diagonal<SparseMatrix> DiagonalReturnType;
|
||||
@ -695,6 +695,15 @@ class SparseMatrix
|
||||
initAssignment(other);
|
||||
other.evalTo(*this);
|
||||
}
|
||||
|
||||
/** \brief Copy constructor with in-place evaluation */
|
||||
template<typename OtherDerived>
|
||||
explicit SparseMatrix(const DiagonalBase<OtherDerived>& other)
|
||||
: Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0)
|
||||
{
|
||||
check_template_parameters();
|
||||
*this = other.derived();
|
||||
}
|
||||
|
||||
/** Swaps the content of two sparse matrices of the same type.
|
||||
* This is a fast operation that simply swaps the underlying pointers and parameters. */
|
||||
|
@ -242,6 +242,11 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
|
||||
Derived& operator+=(const SparseMatrixBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
Derived& operator-=(const SparseMatrixBase<OtherDerived>& other);
|
||||
|
||||
template<typename OtherDerived>
|
||||
Derived& operator+=(const DiagonalBase<OtherDerived>& other);
|
||||
template<typename OtherDerived>
|
||||
Derived& operator-=(const DiagonalBase<OtherDerived>& other);
|
||||
|
||||
Derived& operator*=(const Scalar& other);
|
||||
Derived& operator/=(const Scalar& other);
|
||||
@ -367,6 +372,8 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
|
||||
static inline StorageIndex convert_index(const Index idx) {
|
||||
return internal::convert_index<StorageIndex>(idx);
|
||||
}
|
||||
private:
|
||||
template<typename Dest> void evalTo(Dest &) const;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -45,8 +45,13 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
||||
{
|
||||
public:
|
||||
|
||||
enum { Mode = _Mode };
|
||||
enum {
|
||||
Mode = _Mode,
|
||||
RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime
|
||||
};
|
||||
|
||||
typedef EigenBase<SparseSelfAdjointView> Base;
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
|
||||
@ -116,20 +121,6 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
||||
template<typename DerivedU>
|
||||
SparseSelfAdjointView& rankUpdate(const SparseMatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
|
||||
|
||||
/** \internal triggered by sparse_matrix = SparseSelfadjointView; */
|
||||
template<typename DestScalar,int StorageOrder> void evalTo(SparseMatrix<DestScalar,StorageOrder,StorageIndex>& _dest) const
|
||||
{
|
||||
internal::permute_symm_to_fullsymm<Mode>(m_matrix, _dest);
|
||||
}
|
||||
|
||||
template<typename DestScalar> void evalTo(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& _dest) const
|
||||
{
|
||||
// TODO directly evaluate into _dest;
|
||||
SparseMatrix<DestScalar,ColMajor,StorageIndex> tmp(_dest.rows(),_dest.cols());
|
||||
internal::permute_symm_to_fullsymm<Mode>(m_matrix, tmp);
|
||||
_dest = tmp;
|
||||
}
|
||||
|
||||
/** \returns an expression of P H P^-1 */
|
||||
// TODO implement twists in a more evaluator friendly fashion
|
||||
SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix<Dynamic,Dynamic,StorageIndex>& perm) const
|
||||
@ -140,7 +131,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
||||
template<typename SrcMatrixType,int SrcMode>
|
||||
SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct<SrcMatrixType,SrcMode>& permutedMatrix)
|
||||
{
|
||||
permutedMatrix.evalTo(*this);
|
||||
internal::call_assignment_no_alias_no_transpose(*this, permutedMatrix);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -157,11 +148,21 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
||||
return *this = src.twistedBy(pnull);
|
||||
}
|
||||
|
||||
void resize(Index rows, Index cols)
|
||||
{
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(rows);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(cols);
|
||||
eigen_assert(rows == this->rows() && cols == this->cols()
|
||||
&& "SparseSelfadjointView::resize() does not actually allow to resize.");
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
typename MatrixType::Nested m_matrix;
|
||||
//mutable VectorI m_countPerRow;
|
||||
//mutable VectorI m_countPerCol;
|
||||
private:
|
||||
template<typename Dest> void evalTo(Dest &) const;
|
||||
};
|
||||
|
||||
/***************************************************************************
|
||||
@ -200,6 +201,47 @@ SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<Derive
|
||||
return *this;
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
|
||||
// in the future selfadjoint-ness should be defined by the expression traits
|
||||
// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef SparseSelfAdjointShape Shape;
|
||||
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
struct SparseSelfAdjoint2Sparse {};
|
||||
|
||||
template<> struct AssignmentKind<SparseShape,SparseSelfAdjointShape> { typedef SparseSelfAdjoint2Sparse Kind; };
|
||||
template<> struct AssignmentKind<SparseSelfAdjointShape,SparseShape> { typedef Sparse2Sparse Kind; };
|
||||
|
||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||
struct Assignment<DstXprType, SrcXprType, Functor, SparseSelfAdjoint2Sparse, Scalar>
|
||||
{
|
||||
typedef typename DstXprType::StorageIndex StorageIndex;
|
||||
template<typename DestScalar,int StorageOrder>
|
||||
static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
{
|
||||
internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), dst);
|
||||
}
|
||||
|
||||
template<typename DestScalar>
|
||||
static void run(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
|
||||
{
|
||||
// TODO directly evaluate into dst;
|
||||
SparseMatrix<DestScalar,ColMajor,StorageIndex> tmp(dst.rows(),dst.cols());
|
||||
internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), tmp);
|
||||
dst = tmp;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of sparse self-adjoint time dense matrix
|
||||
***************************************************************************/
|
||||
@ -252,18 +294,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
|
||||
res.row(j) += i.value() * rhs.row(j);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
|
||||
// in the future selfadjoint-ness should be defined by the expression traits
|
||||
// such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
|
||||
template<typename MatrixType, unsigned int Mode>
|
||||
struct evaluator_traits<SparseSelfAdjointView<MatrixType,Mode> >
|
||||
{
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef SparseSelfAdjointShape Shape;
|
||||
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
|
||||
template<typename LhsView, typename Rhs, int ProductType>
|
||||
struct generic_product_impl<LhsView, Rhs, SparseSelfAdjointShape, DenseShape, ProductType>
|
||||
@ -519,12 +550,16 @@ class SparseSymmetricPermutationProduct
|
||||
public:
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||
enum {
|
||||
RowsAtCompileTime = internal::traits<SparseSymmetricPermutationProduct>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<SparseSymmetricPermutationProduct>::ColsAtCompileTime
|
||||
};
|
||||
protected:
|
||||
typedef PermutationMatrix<Dynamic,Dynamic,StorageIndex> Perm;
|
||||
public:
|
||||
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
|
||||
typedef typename MatrixType::Nested MatrixTypeNested;
|
||||
typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
||||
typedef typename internal::remove_all<MatrixTypeNested>::type NestedExpression;
|
||||
|
||||
SparseSymmetricPermutationProduct(const MatrixType& mat, const Perm& perm)
|
||||
: m_matrix(mat), m_perm(perm)
|
||||
@ -532,20 +567,9 @@ class SparseSymmetricPermutationProduct
|
||||
|
||||
inline Index rows() const { return m_matrix.rows(); }
|
||||
inline Index cols() const { return m_matrix.cols(); }
|
||||
|
||||
template<typename DestScalar, int Options, typename DstIndex>
|
||||
void evalTo(SparseMatrix<DestScalar,Options,DstIndex>& _dest) const
|
||||
{
|
||||
// internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
|
||||
SparseMatrix<DestScalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
|
||||
internal::permute_symm_to_fullsymm<Mode>(m_matrix,tmp,m_perm.indices().data());
|
||||
_dest = tmp;
|
||||
}
|
||||
|
||||
template<typename DestType,unsigned int DestMode> void evalTo(SparseSelfAdjointView<DestType,DestMode>& dest) const
|
||||
{
|
||||
internal::permute_symm_to_symm<Mode,DestMode>(m_matrix,dest.matrix(),m_perm.indices().data());
|
||||
}
|
||||
|
||||
const NestedExpression& matrix() const { return m_matrix; }
|
||||
const Perm& perm() const { return m_perm; }
|
||||
|
||||
protected:
|
||||
MatrixTypeNested m_matrix;
|
||||
@ -553,6 +577,31 @@ class SparseSymmetricPermutationProduct
|
||||
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename DstXprType, typename MatrixType, int Mode, typename Scalar>
|
||||
struct Assignment<DstXprType, SparseSymmetricPermutationProduct<MatrixType,Mode>, internal::assign_op<Scalar>, Sparse2Sparse>
|
||||
{
|
||||
typedef SparseSymmetricPermutationProduct<MatrixType,Mode> SrcXprType;
|
||||
typedef typename DstXprType::StorageIndex DstIndex;
|
||||
template<int Options>
|
||||
static void run(SparseMatrix<Scalar,Options,DstIndex> &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||
{
|
||||
// internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
|
||||
SparseMatrix<Scalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
|
||||
internal::permute_symm_to_fullsymm<Mode>(src.matrix(),tmp,src.perm().indices().data());
|
||||
dst = tmp;
|
||||
}
|
||||
|
||||
template<typename DestType,unsigned int DestMode>
|
||||
static void run(SparseSelfAdjointView<DestType,DestMode>& dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
|
||||
{
|
||||
internal::permute_symm_to_symm<Mode,DestMode>(src.matrix(),dst.matrix(),src.perm().indices().data());
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_SPARSE_SELFADJOINTVIEW_H
|
||||
|
@ -37,11 +37,7 @@ EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \
|
||||
}
|
||||
|
||||
#define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
|
||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =) \
|
||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, +=) \
|
||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, -=) \
|
||||
EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, *=) \
|
||||
EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=)
|
||||
EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =)
|
||||
|
||||
// TODO this is mostly the same as EIGEN_GENERIC_PUBLIC_INTERFACE
|
||||
#define _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \
|
||||
|
@ -152,8 +152,8 @@ Index SparseLUImpl<Scalar,StorageIndex>::memInit(Index m, Index n, Index annz, I
|
||||
{
|
||||
Index& num_expansions = glu.num_expansions; //No memory expansions so far
|
||||
num_expansions = 0;
|
||||
glu.nzumax = glu.nzlumax = (std::min)(fillratio * annz / n, m) * n; // estimated number of nonzeros in U
|
||||
glu.nzlmax = (std::max)(Index(4), fillratio) * annz / 4; // estimated nnz in L factor
|
||||
glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U
|
||||
glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated nnz in L factor
|
||||
// Return the estimated size to the user if necessary
|
||||
Index tempSpace;
|
||||
tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
|
||||
|
@ -23,6 +23,10 @@ namespace internal {
|
||||
typedef typename SparseQRType::MatrixType ReturnType;
|
||||
typedef typename ReturnType::StorageIndex StorageIndex;
|
||||
typedef typename ReturnType::StorageKind StorageKind;
|
||||
enum {
|
||||
RowsAtCompileTime = Dynamic,
|
||||
ColsAtCompileTime = Dynamic
|
||||
};
|
||||
};
|
||||
template <typename SparseQRType> struct traits<SparseQRMatrixQTransposeReturnType<SparseQRType> >
|
||||
{
|
||||
@ -235,8 +239,9 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
|
||||
return m_info;
|
||||
}
|
||||
|
||||
protected:
|
||||
inline void sort_matrix_Q()
|
||||
|
||||
/** \internal */
|
||||
inline void _sort_matrix_Q()
|
||||
{
|
||||
if(this->m_isQSorted) return;
|
||||
// The matrix Q is sorted during the transposition
|
||||
@ -267,7 +272,6 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
|
||||
bool m_isEtreeOk; // whether the elimination tree match the initial input matrix
|
||||
|
||||
template <typename, typename > friend struct SparseQR_QProduct;
|
||||
template <typename > friend struct SparseQRMatrixQReturnType;
|
||||
|
||||
};
|
||||
|
||||
@ -635,6 +639,10 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
|
||||
{
|
||||
typedef typename SparseQRType::Scalar Scalar;
|
||||
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
|
||||
enum {
|
||||
RowsAtCompileTime = Dynamic,
|
||||
ColsAtCompileTime = Dynamic
|
||||
};
|
||||
explicit SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {}
|
||||
template<typename Derived>
|
||||
SparseQR_QProduct<SparseQRType, Derived> operator*(const MatrixBase<Derived>& other)
|
||||
@ -652,19 +660,6 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
|
||||
{
|
||||
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
|
||||
}
|
||||
template<typename Dest> void evalTo(MatrixBase<Dest>& dest) const
|
||||
{
|
||||
dest.derived() = m_qr.matrixQ() * Dest::Identity(m_qr.rows(), m_qr.rows());
|
||||
}
|
||||
template<typename Dest> void evalTo(SparseMatrixBase<Dest>& dest) const
|
||||
{
|
||||
Dest idMat(m_qr.rows(), m_qr.rows());
|
||||
idMat.setIdentity();
|
||||
// Sort the sparse householder reflectors if needed
|
||||
const_cast<SparseQRType *>(&m_qr)->sort_matrix_Q();
|
||||
dest.derived() = SparseQR_QProduct<SparseQRType, Dest>(m_qr, idMat, false);
|
||||
}
|
||||
|
||||
const SparseQRType& m_qr;
|
||||
};
|
||||
|
||||
@ -680,6 +675,47 @@ struct SparseQRMatrixQTransposeReturnType
|
||||
const SparseQRType& m_qr;
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename SparseQRType>
|
||||
struct evaluator_traits<SparseQRMatrixQReturnType<SparseQRType> >
|
||||
{
|
||||
typedef typename SparseQRType::MatrixType MatrixType;
|
||||
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
|
||||
typedef SparseShape Shape;
|
||||
static const int AssumeAliasing = 0;
|
||||
};
|
||||
|
||||
template< typename DstXprType, typename SparseQRType>
|
||||
struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar>, Sparse2Sparse>
|
||||
{
|
||||
typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
typedef typename DstXprType::StorageIndex StorageIndex;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &/*func*/)
|
||||
{
|
||||
typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows());
|
||||
idMat.setIdentity();
|
||||
// Sort the sparse householder reflectors if needed
|
||||
const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q();
|
||||
dst = SparseQR_QProduct<SparseQRType, DstXprType>(src.m_qr, idMat, false);
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DstXprType, typename SparseQRType>
|
||||
struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar>, Sparse2Dense>
|
||||
{
|
||||
typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;
|
||||
typedef typename DstXprType::Scalar Scalar;
|
||||
typedef typename DstXprType::StorageIndex StorageIndex;
|
||||
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &/*func*/)
|
||||
{
|
||||
dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows());
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif
|
||||
|
@ -26,6 +26,8 @@ function(EigenDetermineVSServicePack _pack)
|
||||
set(_sp "vc110sp2")
|
||||
elseif(${_cl_version} VERSION_EQUAL "17.00.60610.1")
|
||||
set(_sp "vc110sp3")
|
||||
else()
|
||||
set(_sp ${CMAKE_CXX_COMPILER_VERSION})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -367,7 +367,7 @@ macro(ei_get_compilerver VAR)
|
||||
# on all other system we rely on ${CMAKE_CXX_COMPILER}
|
||||
# supporting a "--version" or "/version" flag
|
||||
|
||||
if(WIN32 AND NOT CYGWIN)
|
||||
if(WIN32 AND NOT CYGWIN AND NOT MINGW)
|
||||
set(EIGEN_CXX_FLAG_VERSION "/version")
|
||||
else()
|
||||
set(EIGEN_CXX_FLAG_VERSION "--version")
|
||||
|
@ -13,17 +13,17 @@ The Eigen library is divided in a Core module and several additional modules. Ea
|
||||
|
||||
<table class="manual">
|
||||
<tr><th>Module</th><th>Header file</th><th>Contents</th></tr>
|
||||
<tr><td>\link Core_Module Core \endlink</td><td>\code#include <Eigen/Core>\endcode</td><td>Matrix and Array classes, basic linear algebra (including triangular and selfadjoint products), array manipulation</td></tr>
|
||||
<tr ><td>\link Core_Module Core \endlink</td><td>\code#include <Eigen/Core>\endcode</td><td>Matrix and Array classes, basic linear algebra (including triangular and selfadjoint products), array manipulation</td></tr>
|
||||
<tr class="alt"><td>\link Geometry_Module Geometry \endlink</td><td>\code#include <Eigen/Geometry>\endcode</td><td>Transform, Translation, Scaling, Rotation2D and 3D rotations (Quaternion, AngleAxis)</td></tr>
|
||||
<tr><td>\link LU_Module LU \endlink</td><td>\code#include <Eigen/LU>\endcode</td><td>Inverse, determinant, LU decompositions with solver (FullPivLU, PartialPivLU)</td></tr>
|
||||
<tr><td>\link Cholesky_Module Cholesky \endlink</td><td>\code#include <Eigen/Cholesky>\endcode</td><td>LLT and LDLT Cholesky factorization with solver</td></tr>
|
||||
<tr class="alt"><td>\link Householder_Module Householder \endlink</td><td>\code#include <Eigen/Householder>\endcode</td><td>Householder transformations; this module is used by several linear algebra modules</td></tr>
|
||||
<tr><td>\link SVD_Module SVD \endlink</td><td>\code#include <Eigen/SVD>\endcode</td><td>SVD decomposition with least-squares solver (JacobiSVD)</td></tr>
|
||||
<tr class="alt"><td>\link QR_Module QR \endlink</td><td>\code#include <Eigen/QR>\endcode</td><td>QR decomposition with solver (HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR)</td></tr>
|
||||
<tr><td>\link Eigenvalues_Module Eigenvalues \endlink</td><td>\code#include <Eigen/Eigenvalues>\endcode</td><td>Eigenvalue, eigenvector decompositions (EigenSolver, SelfAdjointEigenSolver, ComplexEigenSolver)</td></tr>
|
||||
<tr class="alt"><td>\link Sparse_modules Sparse \endlink</td><td>\code#include <Eigen/Sparse>\endcode</td><td>%Sparse matrix storage and related basic linear algebra (SparseMatrix, DynamicSparseMatrix, SparseVector)</td></tr>
|
||||
<tr><td></td><td>\code#include <Eigen/Dense>\endcode</td><td>Includes Core, Geometry, LU, Cholesky, SVD, QR, and Eigenvalues header files</td></tr>
|
||||
<tr class="alt"><td></td><td>\code#include <Eigen/Eigen>\endcode</td><td>Includes %Dense and %Sparse header files (the whole Eigen library)</td></tr>
|
||||
<tr ><td>\link LU_Module LU \endlink</td><td>\code#include <Eigen/LU>\endcode</td><td>Inverse, determinant, LU decompositions with solver (FullPivLU, PartialPivLU)</td></tr>
|
||||
<tr class="alt"><td>\link Cholesky_Module Cholesky \endlink</td><td>\code#include <Eigen/Cholesky>\endcode</td><td>LLT and LDLT Cholesky factorization with solver</td></tr>
|
||||
<tr ><td>\link Householder_Module Householder \endlink</td><td>\code#include <Eigen/Householder>\endcode</td><td>Householder transformations; this module is used by several linear algebra modules</td></tr>
|
||||
<tr class="alt"><td>\link SVD_Module SVD \endlink</td><td>\code#include <Eigen/SVD>\endcode</td><td>SVD decompositions with least-squares solver (JacobiSVD, BDCSVD)</td></tr>
|
||||
<tr ><td>\link QR_Module QR \endlink</td><td>\code#include <Eigen/QR>\endcode</td><td>QR decomposition with solver (HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR)</td></tr>
|
||||
<tr class="alt"><td>\link Eigenvalues_Module Eigenvalues \endlink</td><td>\code#include <Eigen/Eigenvalues>\endcode</td><td>Eigenvalue, eigenvector decompositions (EigenSolver, SelfAdjointEigenSolver, ComplexEigenSolver)</td></tr>
|
||||
<tr ><td>\link Sparse_modules Sparse \endlink</td><td>\code#include <Eigen/Sparse>\endcode</td><td>%Sparse matrix storage and related basic linear algebra (SparseMatrix, SparseVector) \n (see \ref SparseQuickRefPage for details on sparse modules)</td></tr>
|
||||
<tr class="alt"><td></td><td>\code#include <Eigen/Dense>\endcode</td><td>Includes Core, Geometry, LU, Cholesky, SVD, QR, and Eigenvalues header files</td></tr>
|
||||
<tr ><td></td><td>\code#include <Eigen/Eigen>\endcode</td><td>Includes %Dense and %Sparse header files (the whole Eigen library)</td></tr>
|
||||
</table>
|
||||
|
||||
<a href="#" class="top">top</a>
|
||||
@ -364,32 +364,10 @@ vec3 = vec1.cross(vec2);\endcode</td></tr>
|
||||
|
||||
<a href="#" class="top">top</a>
|
||||
\section QuickRef_Coeffwise Coefficient-wise \& Array operators
|
||||
Coefficient-wise operators for matrices and vectors:
|
||||
<table class="manual">
|
||||
<tr><th>Matrix API \matrixworld</th><th>Via Array conversions</th></tr>
|
||||
<tr><td>\code
|
||||
mat1.cwiseMin(mat2)
|
||||
mat1.cwiseMax(mat2)
|
||||
mat1.cwiseAbs2()
|
||||
mat1.cwiseAbs()
|
||||
mat1.cwiseSqrt()
|
||||
mat1.cwiseProduct(mat2)
|
||||
mat1.cwiseQuotient(mat2)\endcode
|
||||
</td><td>\code
|
||||
mat1.array().min(mat2.array())
|
||||
mat1.array().max(mat2.array())
|
||||
mat1.array().abs2()
|
||||
mat1.array().abs()
|
||||
mat1.array().sqrt()
|
||||
mat1.array() * mat2.array()
|
||||
mat1.array() / mat2.array()
|
||||
\endcode</td></tr>
|
||||
</table>
|
||||
|
||||
It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with std::ptr_fun:
|
||||
\code mat1.unaryExpr(std::ptr_fun(foo))\endcode
|
||||
|
||||
Array operators:\arrayworld
|
||||
In addition to the aforementioned operators, Eigen supports numerous coefficient-wise operator and functions.
|
||||
Most of them unambiguously makes sense in array-world\arrayworld. The following operators are readily available for arrays,
|
||||
or available through .array() for vectors and matrices:
|
||||
|
||||
<table class="manual">
|
||||
<tr><td>Arithmetic operators</td><td>\code
|
||||
@ -400,28 +378,107 @@ array1 + scalar array1 - scalar array1 += scalar array1 -= scalar
|
||||
array1 < array2 array1 > array2 array1 < scalar array1 > scalar
|
||||
array1 <= array2 array1 >= array2 array1 <= scalar array1 >= scalar
|
||||
array1 == array2 array1 != array2 array1 == scalar array1 != scalar
|
||||
array1.min(array2) array1.max(array2) array1.min(scalar) array1.max(scalar)
|
||||
\endcode</td></tr>
|
||||
<tr><td>Trigo, power, and \n misc functions \n and the STL variants</td><td>\code
|
||||
array1.min(array2)
|
||||
array1.max(array2)
|
||||
<tr><td>Trigo, power, and \n misc functions \n and the STL-like variants</td><td>\code
|
||||
array1.abs2()
|
||||
array1.abs() abs(array1)
|
||||
array1.sqrt() sqrt(array1)
|
||||
array1.log() log(array1)
|
||||
array1.log10() log10(array1)
|
||||
array1.exp() exp(array1)
|
||||
array1.pow(exponent) pow(array1,exponent)
|
||||
array1.pow(array2) pow(array1,array2)
|
||||
array1.pow(scalar) pow(array1,scalar)
|
||||
array1.square()
|
||||
array1.cube()
|
||||
array1.inverse()
|
||||
|
||||
array1.sin() sin(array1)
|
||||
array1.cos() cos(array1)
|
||||
array1.tan() tan(array1)
|
||||
array1.asin() asin(array1)
|
||||
array1.acos() acos(array1)
|
||||
array1.atan() atan(array1)
|
||||
array1.sinh() sinh(array1)
|
||||
array1.cosh() cosh(array1)
|
||||
array1.tanh() tanh(array1)
|
||||
array1.arg() arg(array1)
|
||||
|
||||
array1.floor() floor(array1)
|
||||
array1.ceil() ceil(array1)
|
||||
array1.round() round(aray1)
|
||||
|
||||
array1.isFinite() isfinite(array1)
|
||||
array1.isInf() isinf(array1)
|
||||
array1.isNaN() isnan(array1)
|
||||
\endcode
|
||||
</td></tr>
|
||||
</table>
|
||||
|
||||
|
||||
The following coefficient-wise operators are available for all kind of expressions (matrices, vectors, and arrays), and for both real or complex scalar types:
|
||||
|
||||
<table class="manual">
|
||||
<tr><th>Eigen's API</th><th>STL-like APIs\arrayworld </th><th>Comments</th></tr>
|
||||
<tr><td>\code
|
||||
mat1.real()
|
||||
mat1.imag()
|
||||
mat1.conjugate()
|
||||
\endcode
|
||||
</td><td>\code
|
||||
real(array1)
|
||||
imag(array1)
|
||||
conj(array1)
|
||||
\endcode
|
||||
</td><td>
|
||||
\code
|
||||
// read-write, no-op for real expressions
|
||||
// read-only for real, read-write for complexes
|
||||
// no-op for real expressions
|
||||
\endcode
|
||||
</td></tr>
|
||||
</table>
|
||||
|
||||
Some coefficient-wise operators are readily available for for matrices and vectors through the following cwise* methods:
|
||||
<table class="manual">
|
||||
<tr><th>Matrix API \matrixworld</th><th>Via Array conversions</th></tr>
|
||||
<tr><td>\code
|
||||
mat1.cwiseMin(mat2) mat1.cwiseMin(scalar)
|
||||
mat1.cwiseMax(mat2) mat1.cwiseMax(scalar)
|
||||
mat1.cwiseAbs2()
|
||||
mat1.cwiseAbs()
|
||||
mat1.cwiseSqrt()
|
||||
mat1.cwiseInverse()
|
||||
mat1.cwiseProduct(mat2)
|
||||
mat1.cwiseQuotient(mat2)
|
||||
mat1.cwiseEqual(mat2) mat1.cwiseEqual(scalar)
|
||||
mat1.cwiseNotEqual(mat2)
|
||||
\endcode
|
||||
</td><td>\code
|
||||
mat1.array().min(mat2.array()) mat1.array().min(scalar)
|
||||
mat1.array().max(mat2.array()) mat1.array().max(scalar)
|
||||
mat1.array().abs2()
|
||||
mat1.array().abs()
|
||||
mat1.array().sqrt()
|
||||
mat1.array().inverse()
|
||||
mat1.array() * mat2.array()
|
||||
mat1.array() / mat2.array()
|
||||
mat1.array() == mat2.array() mat1.array() == scalar
|
||||
mat1.array() != mat2.array()
|
||||
\endcode</td></tr>
|
||||
</table>
|
||||
The main difference between the two API is that the one based on cwise* methods returns an expression in the matrix world,
|
||||
while the second one (based on .array()) returns an array expression.
|
||||
Recall that .array() has no cost, it only changes the available API and interpretation of the data.
|
||||
|
||||
It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with <a href="http://en.cppreference.com/w/cpp/utility/functional/ptr_fun">std::ptr_fun</a> (c++03), <a href="http://en.cppreference.com/w/cpp/utility/functional/ref">std::ref</a> (c++11), or <a href="http://en.cppreference.com/w/cpp/language/lambda">lambdas</a> (c++11):
|
||||
\code
|
||||
mat1.unaryExpr(std::ptr_fun(foo));
|
||||
mat1.unaryExpr(std::ref(foo));
|
||||
mat1.unaryExpr([](double x) { return foo(x); });
|
||||
\endcode
|
||||
|
||||
|
||||
<a href="#" class="top">top</a>
|
||||
\section QuickRef_Reductions Reductions
|
||||
|
||||
|
@ -21,7 +21,7 @@ They are summarized in the following table:
|
||||
<tr><td>ConjugateGradient</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Classic iterative CG</td><td>SPD</td><td>Preconditionning</td>
|
||||
<td>built-in, MPL2</td>
|
||||
<td>Recommended for large symmetric problems (e.g., 3D Poisson eq.)</td></tr>
|
||||
<tr><td>LSCG</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>CG for rectangular least-square problem</td><td>Rectangular</td><td>Preconditionning</td>
|
||||
<tr><td>LeastSquaresConjugateGradient</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>CG for rectangular least-square problem</td><td>Rectangular</td><td>Preconditionning</td>
|
||||
<td>built-in, MPL2</td>
|
||||
<td>Solve for min |A'Ax-b|^2 without forming A'A</td></tr>
|
||||
<tr><td>BiCGSTAB</td><td>\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink</td><td>Iterative stabilized bi-conjugate gradient</td><td>Square</td><td>Preconditionning</td>
|
||||
|
@ -22,8 +22,12 @@ n = Eigen::nbThreads( );
|
||||
You can disable Eigen's multi threading at compile time by defining the EIGEN_DONT_PARALLELIZE preprocessor token.
|
||||
|
||||
Currently, the following algorithms can make use of multi-threading:
|
||||
* general matrix - matrix products
|
||||
* PartialPivLU
|
||||
- general dense matrix - matrix products
|
||||
- PartialPivLU
|
||||
- row-major-sparse * dense vector/matrix products
|
||||
- ConjugateGradient with \c Lower|Upper as the \c UpLo template parameter.
|
||||
- BiCGSTAB with a row-major sparse matrix format.
|
||||
- LeastSquaresConjugateGradient
|
||||
|
||||
\section TopicMultiThreading_UsingEigenWithMT Using Eigen in a multi-threaded application
|
||||
|
||||
|
@ -124,8 +124,11 @@ template<typename ArrayType> void comparisons(const ArrayType& m)
|
||||
c = internal::random<Index>(0, cols-1);
|
||||
|
||||
ArrayType m1 = ArrayType::Random(rows, cols),
|
||||
m2 = ArrayType::Random(rows, cols),
|
||||
m3(rows, cols);
|
||||
m2 = ArrayType::Random(rows, cols),
|
||||
m3(rows, cols),
|
||||
m4 = m1;
|
||||
|
||||
m4 = (m4.abs()==Scalar(0)).select(1,m4);
|
||||
|
||||
VERIFY(((m1 + Scalar(1)) > m1).all());
|
||||
VERIFY(((m1 - Scalar(1)) < m1).all());
|
||||
@ -197,7 +200,10 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
||||
|
||||
ArrayType m1 = ArrayType::Random(rows, cols),
|
||||
m2 = ArrayType::Random(rows, cols),
|
||||
m3(rows, cols);
|
||||
m3(rows, cols),
|
||||
m4 = m1;
|
||||
|
||||
m4 = (m4.abs()==Scalar(0)).select(1,m4);
|
||||
|
||||
Scalar s1 = internal::random<Scalar>();
|
||||
|
||||
@ -215,9 +221,9 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
||||
VERIFY_IS_APPROX(m1.round(), round(m1));
|
||||
VERIFY_IS_APPROX(m1.floor(), floor(m1));
|
||||
VERIFY_IS_APPROX(m1.ceil(), ceil(m1));
|
||||
VERIFY((m1.isNaN() == isnan(m1)).all());
|
||||
VERIFY((m1.isInf() == isinf(m1)).all());
|
||||
VERIFY((m1.isFinite() == isfinite(m1)).all());
|
||||
VERIFY((m1.isNaN() == Eigen::isnan(m1)).all());
|
||||
VERIFY((m1.isInf() == Eigen::isinf(m1)).all());
|
||||
VERIFY((m1.isFinite() == Eigen::isfinite(m1)).all());
|
||||
VERIFY_IS_APPROX(m1.inverse(), inverse(m1));
|
||||
VERIFY_IS_APPROX(m1.abs(), abs(m1));
|
||||
VERIFY_IS_APPROX(m1.abs2(), abs2(m1));
|
||||
@ -243,9 +249,9 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
||||
VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
|
||||
VERIFY_IS_APPROX(arg(m1), ((ArrayType)(m1<0))*std::acos(-1.0));
|
||||
VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all());
|
||||
VERIFY(isnan(m1*0.0/0.0).all());
|
||||
VERIFY(isinf(m1/0.0).all());
|
||||
VERIFY((isfinite(m1) && !isfinite(m1*0.0/0.0) && !isfinite(m1/0.0)).all());
|
||||
VERIFY(Eigen::isnan((m1*0.0)/0.0).all());
|
||||
VERIFY(Eigen::isinf(m4/0.0).all());
|
||||
VERIFY((Eigen::isfinite(m1) && (!Eigen::isfinite(m1*0.0/0.0)) && (!Eigen::isfinite(m4/0.0))).all());
|
||||
VERIFY_IS_APPROX(inverse(inverse(m1)),m1);
|
||||
VERIFY((abs(m1) == m1 || abs(m1) == -m1).all());
|
||||
VERIFY_IS_APPROX(m3, sqrt(abs2(m1)));
|
||||
@ -299,7 +305,11 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
|
||||
Index cols = m.cols();
|
||||
|
||||
ArrayType m1 = ArrayType::Random(rows, cols),
|
||||
m2(rows, cols);
|
||||
m2(rows, cols),
|
||||
m4 = m1;
|
||||
|
||||
m4.real() = (m4.real().abs()==RealScalar(0)).select(RealScalar(1),m4.real());
|
||||
m4.imag() = (m4.imag().abs()==RealScalar(0)).select(RealScalar(1),m4.imag());
|
||||
|
||||
Array<RealScalar, -1, -1> m3(rows, cols);
|
||||
|
||||
@ -317,9 +327,9 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
|
||||
VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
|
||||
VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
|
||||
VERIFY_IS_APPROX(m1.arg(), arg(m1));
|
||||
VERIFY((m1.isNaN() == isnan(m1)).all());
|
||||
VERIFY((m1.isInf() == isinf(m1)).all());
|
||||
VERIFY((m1.isFinite() == isfinite(m1)).all());
|
||||
VERIFY((m1.isNaN() == Eigen::isnan(m1)).all());
|
||||
VERIFY((m1.isInf() == Eigen::isinf(m1)).all());
|
||||
VERIFY((m1.isFinite() == Eigen::isfinite(m1)).all());
|
||||
VERIFY_IS_APPROX(m1.inverse(), inverse(m1));
|
||||
VERIFY_IS_APPROX(m1.log(), log(m1));
|
||||
VERIFY_IS_APPROX(m1.log10(), log10(m1));
|
||||
@ -345,20 +355,20 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
|
||||
VERIFY_IS_APPROX(arg(m1), m3);
|
||||
|
||||
std::complex<RealScalar> zero(0.0,0.0);
|
||||
VERIFY(isnan(m1*zero/zero).all());
|
||||
VERIFY(Eigen::isnan(m1*zero/zero).all());
|
||||
#if EIGEN_COMP_CLANG
|
||||
// clang's complex division is notoriously broken
|
||||
if(numext::isinf(m1(0,0)/Scalar(0))) {
|
||||
if(numext::isinf(m4(0,0)/RealScalar(0))) {
|
||||
#endif
|
||||
VERIFY(isinf(m1/zero).all());
|
||||
VERIFY(Eigen::isinf(m4/zero).all());
|
||||
#if EIGEN_COMP_CLANG
|
||||
}
|
||||
else
|
||||
{
|
||||
VERIFY(isinf(m1.real()/zero.real()).all());
|
||||
VERIFY(Eigen::isinf(m4.real()/zero.real()).all());
|
||||
}
|
||||
#endif
|
||||
VERIFY((isfinite(m1) && !isfinite(m1*zero/zero) && !isfinite(m1/zero)).all());
|
||||
VERIFY((Eigen::isfinite(m1) && (!Eigen::isfinite(m1*zero/zero)) && (!Eigen::isfinite(m1/zero))).all());
|
||||
|
||||
VERIFY_IS_APPROX(inverse(inverse(m1)),m1);
|
||||
VERIFY_IS_APPROX(conj(m1.conjugate()), m1);
|
||||
|
@ -223,7 +223,7 @@ void fixedSizeMatrixConstruction()
|
||||
for(int k=0; k<2; ++k) VERIFY(m2(k) == DenseIndex(raw[k]));
|
||||
for(int k=0; k<2; ++k) VERIFY(a2(k) == DenseIndex(raw[k]));
|
||||
for(int k=0; k<2; ++k) VERIFY(m3(k) == int(raw[k]));
|
||||
for(int k=0; k<2; ++k) VERIFY(m4(k) == float(raw[k]));
|
||||
for(int k=0; k<2; ++k) VERIFY((m4(k)) == Scalar(float(raw[k])));
|
||||
}
|
||||
{
|
||||
Matrix<Scalar,1,1> m(raw), m1(raw[0]), m2( (DenseIndex(raw[0])) ), m3( (int(raw[0])) );
|
||||
|
@ -69,8 +69,8 @@ void test_bdcsvd()
|
||||
CALL_SUBTEST_7(( svd_verify_assert<BDCSVD<MatrixXf> >(MatrixXf(10,12)) ));
|
||||
CALL_SUBTEST_8(( svd_verify_assert<BDCSVD<MatrixXcd> >(MatrixXcd(7,5)) ));
|
||||
|
||||
CALL_SUBTEST_1(( svd_all_trivial_2x2(bdcsvd<Matrix2cd>) ));
|
||||
CALL_SUBTEST_1(( svd_all_trivial_2x2(bdcsvd<Matrix2d>) ));
|
||||
CALL_SUBTEST_101(( svd_all_trivial_2x2(bdcsvd<Matrix2cd>) ));
|
||||
CALL_SUBTEST_102(( svd_all_trivial_2x2(bdcsvd<Matrix2d>) ));
|
||||
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
CALL_SUBTEST_3(( bdcsvd<Matrix3f>() ));
|
||||
@ -104,8 +104,8 @@ void test_bdcsvd()
|
||||
CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) );
|
||||
|
||||
// Check that preallocation avoids subsequent mallocs
|
||||
CALL_SUBTEST_9( svd_preallocate() );
|
||||
CALL_SUBTEST_9( svd_preallocate<void>() );
|
||||
|
||||
CALL_SUBTEST_2( svd_underoverflow() );
|
||||
CALL_SUBTEST_2( svd_underoverflow<void>() );
|
||||
}
|
||||
|
||||
|
@ -114,7 +114,7 @@ void test_jacobisvd()
|
||||
CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) );
|
||||
|
||||
// Check that preallocation avoids subsequent mallocs
|
||||
CALL_SUBTEST_9( svd_preallocate() );
|
||||
CALL_SUBTEST_9( svd_preallocate<void>() );
|
||||
|
||||
CALL_SUBTEST_2( svd_underoverflow() );
|
||||
CALL_SUBTEST_2( svd_underoverflow<void>() );
|
||||
}
|
||||
|
@ -88,6 +88,10 @@ template<typename MatrixType> void real_complex(DenseIndex rows = MatrixType::Ro
|
||||
g_called = false;
|
||||
VERIFY_IS_APPROX(m1*s, m1*Scalar(s));
|
||||
VERIFY(g_called && "matrix<complex> * real not properly optimized");
|
||||
|
||||
g_called = false;
|
||||
VERIFY_IS_APPROX(m1/s, m1/Scalar(s));
|
||||
VERIFY(g_called && "matrix<complex> / real not properly optimized");
|
||||
}
|
||||
|
||||
void test_linearstructure()
|
||||
|
@ -315,9 +315,29 @@ template<typename Scalar> void packetmath_real()
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasExp, std::exp, internal::pexp);
|
||||
{
|
||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
data1[1] = std::numeric_limits<Scalar>::epsilon();
|
||||
packet_helper<internal::packet_traits<Scalar>::HasExp,Packet> h;
|
||||
h.store(data2, internal::pexp(h.load(data1)));
|
||||
h.store(data2, internal::pexp(h.load(data1)));
|
||||
VERIFY(numext::isnan(data2[0]));
|
||||
VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
||||
|
||||
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
||||
data1[1] = 0;
|
||||
h.store(data2, internal::pexp(h.load(data1)));
|
||||
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]);
|
||||
VERIFY_IS_EQUAL(std::exp(0), data2[1]);
|
||||
|
||||
data1[0] = (std::numeric_limits<Scalar>::min)();
|
||||
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
||||
h.store(data2, internal::pexp(h.load(data1)));
|
||||
VERIFY_IS_EQUAL(std::exp((std::numeric_limits<Scalar>::min)()), data2[0]);
|
||||
VERIFY_IS_EQUAL(std::exp(-(std::numeric_limits<Scalar>::min)()), data2[1]);
|
||||
|
||||
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
||||
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
||||
h.store(data2, internal::pexp(h.load(data1)));
|
||||
VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
||||
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::denorm_min()), data2[1]);
|
||||
}
|
||||
|
||||
for (int i=0; i<size; ++i)
|
||||
@ -331,12 +351,33 @@ template<typename Scalar> void packetmath_real()
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
|
||||
{
|
||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
data1[1] = std::numeric_limits<Scalar>::epsilon();
|
||||
packet_helper<internal::packet_traits<Scalar>::HasLog,Packet> h;
|
||||
h.store(data2, internal::plog(h.load(data1)));
|
||||
VERIFY(numext::isnan(data2[0]));
|
||||
VERIFY(std::isnan(data2[0]));
|
||||
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
||||
|
||||
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
||||
data1[1] = 0;
|
||||
h.store(data2, internal::plog(h.load(data1)));
|
||||
VERIFY(std::isnan(data2[0]));
|
||||
// VERIFY_IS_EQUAL(std::log(0), data2[1]);
|
||||
|
||||
data1[0] = (std::numeric_limits<Scalar>::min)();
|
||||
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
||||
h.store(data2, internal::plog(h.load(data1)));
|
||||
VERIFY_IS_EQUAL(std::log((std::numeric_limits<Scalar>::min)()), data2[0]);
|
||||
// VERIFY(std::isnan(data2[1]));
|
||||
|
||||
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
||||
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
||||
h.store(data2, internal::plog(h.load(data1)));
|
||||
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
||||
// VERIFY(std::isnan(data2[1]));
|
||||
|
||||
data1[0] = -1.0f;
|
||||
h.store(data2, internal::plog(h.load(data1)));
|
||||
VERIFY(numext::isnan(data2[0]));
|
||||
VERIFY(std::isnan(data2[0]));
|
||||
#if !EIGEN_FAST_MATH
|
||||
h.store(data2, internal::psqrt(h.load(data1)));
|
||||
VERIFY(numext::isnan(data2[0]));
|
||||
|
@ -23,8 +23,8 @@ template<typename MatrixType> void qr()
|
||||
MatrixType m1;
|
||||
createRandomPIMatrixOfRank(rank,rows,cols,m1);
|
||||
ColPivHouseholderQR<MatrixType> qr(m1);
|
||||
VERIFY(rank == qr.rank());
|
||||
VERIFY(cols - qr.rank() == qr.dimensionOfKernel());
|
||||
VERIFY_IS_EQUAL(rank, qr.rank());
|
||||
VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel());
|
||||
VERIFY(!qr.isInjective());
|
||||
VERIFY(!qr.isInvertible());
|
||||
VERIFY(!qr.isSurjective());
|
||||
@ -51,11 +51,11 @@ template<typename MatrixType, int Cols2> void qr_fixedsize()
|
||||
Matrix<Scalar,Rows,Cols> m1;
|
||||
createRandomPIMatrixOfRank(rank,Rows,Cols,m1);
|
||||
ColPivHouseholderQR<Matrix<Scalar,Rows,Cols> > qr(m1);
|
||||
VERIFY(rank == qr.rank());
|
||||
VERIFY(Cols - qr.rank() == qr.dimensionOfKernel());
|
||||
VERIFY(qr.isInjective() == (rank == Rows));
|
||||
VERIFY(qr.isSurjective() == (rank == Cols));
|
||||
VERIFY(qr.isInvertible() == (qr.isInjective() && qr.isSurjective()));
|
||||
VERIFY_IS_EQUAL(rank, qr.rank());
|
||||
VERIFY_IS_EQUAL(Cols - qr.rank(), qr.dimensionOfKernel());
|
||||
VERIFY_IS_EQUAL(qr.isInjective(), (rank == Rows));
|
||||
VERIFY_IS_EQUAL(qr.isSurjective(), (rank == Cols));
|
||||
VERIFY_IS_EQUAL(qr.isInvertible(), (qr.isInjective() && qr.isSurjective()));
|
||||
|
||||
Matrix<Scalar,Rows,Cols> r = qr.matrixQR().template triangularView<Upper>();
|
||||
Matrix<Scalar,Rows,Cols> c = qr.householderQ() * r * qr.colsPermutation().inverse();
|
||||
|
@ -23,8 +23,8 @@ template<typename MatrixType> void qr()
|
||||
MatrixType m1;
|
||||
createRandomPIMatrixOfRank(rank,rows,cols,m1);
|
||||
FullPivHouseholderQR<MatrixType> qr(m1);
|
||||
VERIFY(rank == qr.rank());
|
||||
VERIFY(cols - qr.rank() == qr.dimensionOfKernel());
|
||||
VERIFY_IS_EQUAL(rank, qr.rank());
|
||||
VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel());
|
||||
VERIFY(!qr.isInjective());
|
||||
VERIFY(!qr.isInvertible());
|
||||
VERIFY(!qr.isSurjective());
|
||||
|
@ -221,6 +221,12 @@ int test_ref_overload_fun1(Ref<MatrixXf> ) { return 3; }
|
||||
int test_ref_overload_fun2(Ref<const MatrixXd> ) { return 4; }
|
||||
int test_ref_overload_fun2(Ref<const MatrixXf> ) { return 5; }
|
||||
|
||||
void test_ref_ambiguous(const Ref<const ArrayXd> &A, Ref<ArrayXd> B)
|
||||
{
|
||||
B = A;
|
||||
B = A - A;
|
||||
}
|
||||
|
||||
// See also bug 969
|
||||
void test_ref_overloads()
|
||||
{
|
||||
@ -233,6 +239,9 @@ void test_ref_overloads()
|
||||
VERIFY( test_ref_overload_fun2(Ad)==4 );
|
||||
VERIFY( test_ref_overload_fun2(Ad+Bd)==4 );
|
||||
VERIFY( test_ref_overload_fun2(Af+Bf)==5 );
|
||||
|
||||
ArrayXd A, B;
|
||||
test_ref_ambiguous(A, B);
|
||||
}
|
||||
|
||||
void test_ref()
|
||||
|
@ -365,6 +365,20 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
|
||||
VERIFY_IS_APPROX(m2, refMat2);
|
||||
}
|
||||
|
||||
// test diagonal to sparse
|
||||
{
|
||||
DenseVector d = DenseVector::Random(rows);
|
||||
DenseMatrix refMat2 = d.asDiagonal();
|
||||
SparseMatrixType m2(rows, rows);
|
||||
m2 = d.asDiagonal();
|
||||
VERIFY_IS_APPROX(m2, refMat2);
|
||||
SparseMatrixType m3(d.asDiagonal());
|
||||
VERIFY_IS_APPROX(m3, refMat2);
|
||||
refMat2 += d.asDiagonal();
|
||||
m2 += d.asDiagonal();
|
||||
VERIFY_IS_APPROX(m2, refMat2);
|
||||
}
|
||||
|
||||
// test conservative resize
|
||||
{
|
||||
std::vector< std::pair<StorageIndex,StorageIndex> > inc;
|
||||
|
@ -272,6 +272,7 @@ template<typename Solver> void check_sparse_spd_solving(Solver& solver, int maxS
|
||||
DenseVector b = it.rhs();
|
||||
DenseVector refX = it.refX();
|
||||
PermutationMatrix<Dynamic, Dynamic, StorageIndex> pnull;
|
||||
halfA.resize(A.rows(), A.cols());
|
||||
if(Solver::UpLo == (Lower|Upper))
|
||||
halfA = A;
|
||||
else
|
||||
|
@ -89,6 +89,11 @@ template<typename Scalar> void test_sparseqr_scalar()
|
||||
QtQ = Q * Q.adjoint();
|
||||
idM.resize(Q.rows(), Q.rows()); idM.setIdentity();
|
||||
VERIFY(idM.isApprox(QtQ));
|
||||
|
||||
// Q to dense
|
||||
DenseMat dQ;
|
||||
dQ = solver.matrixQ();
|
||||
VERIFY_IS_APPROX(Q, dQ);
|
||||
}
|
||||
void test_sparseqr()
|
||||
{
|
||||
|
@ -33,6 +33,7 @@ void svd_check_full(const MatrixType& m, const SvdType& svd)
|
||||
};
|
||||
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename MatrixType::RealScalar RealScalar;
|
||||
typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType;
|
||||
typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType;
|
||||
|
||||
@ -40,7 +41,10 @@ void svd_check_full(const MatrixType& m, const SvdType& svd)
|
||||
sigma.diagonal() = svd.singularValues().template cast<Scalar>();
|
||||
MatrixUType u = svd.matrixU();
|
||||
MatrixVType v = svd.matrixV();
|
||||
VERIFY_IS_APPROX(m, u * sigma * v.adjoint());
|
||||
RealScalar scaling = m.cwiseAbs().maxCoeff();
|
||||
if(scaling<=(std::numeric_limits<RealScalar>::min)())
|
||||
scaling = RealScalar(1);
|
||||
VERIFY_IS_APPROX(m/scaling, u * (sigma/scaling) * v.adjoint());
|
||||
VERIFY_IS_UNITARY(u);
|
||||
VERIFY_IS_UNITARY(v);
|
||||
}
|
||||
@ -307,6 +311,7 @@ void svd_inf_nan()
|
||||
|
||||
// Regression test for bug 286: JacobiSVD loops indefinitely with some
|
||||
// matrices containing denormal numbers.
|
||||
template<typename>
|
||||
void svd_underoverflow()
|
||||
{
|
||||
#if defined __INTEL_COMPILER
|
||||
@ -384,6 +389,7 @@ void svd_all_trivial_2x2( void (*cb)(const MatrixType&,bool) )
|
||||
} while((id<int(value_set.size())).all());
|
||||
}
|
||||
|
||||
template<typename>
|
||||
void svd_preallocate()
|
||||
{
|
||||
Vector3f v(3.f, 2.f, 1.f);
|
||||
|
@ -59,8 +59,10 @@
|
||||
#include "Eigen/Core"
|
||||
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
|
||||
@ -80,6 +82,7 @@
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h"
|
||||
@ -88,6 +91,7 @@
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h"
|
||||
|
@ -23,7 +23,7 @@ template <typename T, size_t n> class array {
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
|
||||
|
||||
static const std::size_t size = n;
|
||||
static const std::size_t size() { return n; }
|
||||
|
||||
T values[n];
|
||||
|
||||
|
@ -375,6 +375,28 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
|
||||
resize(dims);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_EMULATE_CXX11_META_H
|
||||
template <typename std::ptrdiff_t... Indices>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(const Sizes<Indices...>& dimensions) {
|
||||
array<Index, NumIndices> dims;
|
||||
for (std::size_t i = 0; i < NumIndices; ++i) {
|
||||
dims[i] = static_cast<Index>(dimensions[i]);
|
||||
}
|
||||
resize(dims);
|
||||
}
|
||||
#else
|
||||
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
|
||||
EIGEN_DEVICE_FUNC
|
||||
void resize(const Sizes<V1, V2, V3, V4, V5>& dimensions) {
|
||||
array<Index, NumIndices> dims;
|
||||
for (std::size_t i = 0; i < NumIndices; ++i) {
|
||||
dims[i] = static_cast<Index>(dimensions[i]);
|
||||
}
|
||||
resize(dims);
|
||||
}
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
||||
bool checkIndexRange(const array<Index, NumIndices>& indices) const
|
||||
|
@ -108,6 +108,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return unaryExpr(internal::scalar_inverse_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived>
|
||||
tanh() const {
|
||||
return unaryExpr(internal::scalar_tanh_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived>
|
||||
exp() const {
|
||||
@ -295,11 +301,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::SumReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
||||
const TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||
sum() const {
|
||||
array<Index, NumDimensions> in_dims;
|
||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
||||
return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::SumReducer<CoeffReturnType>());
|
||||
DimensionList<Index, NumDimensions> in_dims;
|
||||
return TensorReductionOp<internal::SumReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::SumReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@ -308,11 +313,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MeanReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
||||
const TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||
mean() const {
|
||||
array<Index, NumDimensions> in_dims;
|
||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
||||
return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MeanReducer<CoeffReturnType>());
|
||||
DimensionList<Index, NumDimensions> in_dims;
|
||||
return TensorReductionOp<internal::MeanReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MeanReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@ -321,11 +325,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::ProdReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
||||
const TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||
prod() const {
|
||||
array<Index, NumDimensions> in_dims;
|
||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
||||
return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::ProdReducer<CoeffReturnType>());
|
||||
DimensionList<Index, NumDimensions> in_dims;
|
||||
return TensorReductionOp<internal::ProdReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::ProdReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@ -334,11 +337,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MaxReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
||||
const TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||
maximum() const {
|
||||
array<Index, NumDimensions> in_dims;
|
||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
||||
return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MaxReducer<CoeffReturnType>());
|
||||
DimensionList<Index, NumDimensions> in_dims;
|
||||
return TensorReductionOp<internal::MaxReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MaxReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@ -347,11 +349,10 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const Dims, const Derived>(derived(), dims, internal::MinReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>
|
||||
const TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>
|
||||
minimum() const {
|
||||
array<Index, NumDimensions> in_dims;
|
||||
for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i;
|
||||
return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const array<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MinReducer<CoeffReturnType>());
|
||||
DimensionList<Index, NumDimensions> in_dims;
|
||||
return TensorReductionOp<internal::MinReducer<CoeffReturnType>, const DimensionList<Index, NumDimensions>, const Derived>(derived(), in_dims, internal::MinReducer<CoeffReturnType>());
|
||||
}
|
||||
|
||||
template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
@ -413,6 +414,26 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
padding_type);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>
|
||||
extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols,
|
||||
const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1,
|
||||
const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = 0) const {
|
||||
return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value);
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>
|
||||
extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols,
|
||||
const Index plane_stride, const Index row_stride, const Index col_stride,
|
||||
const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride,
|
||||
const Index padding_top_z, const Index padding_bottom_z,
|
||||
const Index padding_top, const Index padding_bottom,
|
||||
const Index padding_left, const Index padding_right, const Scalar padding_value = 0) const {
|
||||
return TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Derived>(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value);
|
||||
}
|
||||
|
||||
// Morphing operators.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorLayoutSwapOp<const Derived>
|
||||
@ -460,6 +481,18 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return TensorStridingOp<const Strides, const Derived>(derived(), strides);
|
||||
}
|
||||
|
||||
// Added support for custom unary and binary operations
|
||||
template <typename CustomUnaryFunc>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCustomUnaryOp<const CustomUnaryFunc, const Derived> customOp(const CustomUnaryFunc& op) const {
|
||||
return TensorCustomUnaryOp<const CustomUnaryFunc, const Derived>(derived(), op);
|
||||
}
|
||||
template <typename OtherDerived, typename CustomBinaryFunc>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived> customOp(const OtherDerived& other, const CustomBinaryFunc& op) const {
|
||||
return TensorCustomBinaryOp<const CustomBinaryFunc, const Derived, const OtherDerived>(derived(), other, op);
|
||||
}
|
||||
|
||||
// Force the evaluation of the expression.
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorForcedEvalOp<const Derived> eval() const {
|
||||
|
@ -106,8 +106,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
|
||||
{
|
||||
typedef TensorAssignOp<TensorChippingOp, const TensorChippingOp> Assign;
|
||||
Assign assign(*this, other);
|
||||
static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess;
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -117,8 +116,7 @@ class TensorChippingOp : public TensorBase<TensorChippingOp<DimId, XprType> >
|
||||
{
|
||||
typedef TensorAssignOp<TensorChippingOp, const OtherDerived> Assign;
|
||||
Assign assign(*this, other);
|
||||
static const bool Vectorize = TensorEvaluator<const Assign, DefaultDevice>::PacketAccess;
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, Vectorize>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
|
||||
{
|
||||
typedef TensorAssignOp<TensorConcatenationOp, const TensorConcatenationOp> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -98,7 +98,7 @@ class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsX
|
||||
{
|
||||
typedef TensorAssignOp<TensorConcatenationOp, const OtherDerived> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -334,7 +334,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
|
||||
eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());
|
||||
|
||||
EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
|
||||
PacketReturnType rslt = internal::pstore<PacketReturnType>(values, x);
|
||||
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||
for (int i = 0; i < packetSize; ++i) {
|
||||
coeffRef(index+i) = values[i];
|
||||
}
|
||||
|
@ -364,14 +364,6 @@ class TensorContractionInputMapper<Scalar, Index, side, Tensor, nocontract_t, co
|
||||
};
|
||||
|
||||
|
||||
template <size_t n> struct max_n_1 {
|
||||
static const size_t size = n;
|
||||
};
|
||||
template <> struct max_n_1<0> {
|
||||
static const size_t size = 1;
|
||||
};
|
||||
|
||||
|
||||
template<typename Dimensions, typename LhsXprType, typename RhsXprType>
|
||||
struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >
|
||||
{
|
||||
@ -459,19 +451,6 @@ class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXp
|
||||
};
|
||||
|
||||
|
||||
template<bool cond> struct Cond {};
|
||||
|
||||
template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
const T1& choose(Cond<true>, const T1& first, const T2&) {
|
||||
return first;
|
||||
}
|
||||
|
||||
template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
const T2& choose(Cond<false>, const T1&, const T2& second) {
|
||||
return second;
|
||||
}
|
||||
|
||||
|
||||
template<typename Derived>
|
||||
struct TensorContractionEvaluatorBase
|
||||
{
|
||||
@ -508,13 +487,13 @@ struct TensorContractionEvaluatorBase
|
||||
static const int RDims =
|
||||
internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;
|
||||
static const unsigned int ContractDims = internal::array_size<Indices>::value;
|
||||
static const int NumDims = internal::max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||
|
||||
typedef array<Index, LDims> left_dim_mapper_t;
|
||||
typedef array<Index, RDims> right_dim_mapper_t;
|
||||
typedef array<Index, ContractDims> contract_t;
|
||||
typedef array<Index, internal::max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, internal::max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
@ -869,10 +848,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef array<Index, RDims> right_dim_mapper_t;
|
||||
|
||||
typedef array<Index, ContractDims> contract_t;
|
||||
typedef array<Index, internal::max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, internal::max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
|
||||
static const int NumDims = internal::max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||
|
||||
// Could we use NumDimensions here?
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
@ -1241,10 +1241,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef array<Index, RDims> right_dim_mapper_t;
|
||||
|
||||
typedef array<Index, ContractDims> contract_t;
|
||||
typedef array<Index, internal::max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, internal::max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
|
||||
static const int NumDims = internal::max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
|
@ -93,10 +93,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
||||
typedef array<Index, RDims> right_dim_mapper_t;
|
||||
|
||||
typedef array<Index, ContractDims> contract_t;
|
||||
typedef array<Index, internal::max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, internal::max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
typedef array<Index, max_n_1<LDims - ContractDims>::size> left_nocontract_t;
|
||||
typedef array<Index, max_n_1<RDims - ContractDims>::size> right_nocontract_t;
|
||||
|
||||
static const int NumDims = internal::max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||
static const int NumDims = max_n_1<LDims + RDims - 2 * ContractDims>::size;
|
||||
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
|
||||
|
@ -510,7 +510,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
||||
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
||||
EvalTo evalToTmp(local, m_kernelArg);
|
||||
internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<KernelArgType, Device>::PacketAccess>::run(evalToTmp, m_device);
|
||||
const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
|
||||
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
|
||||
|
||||
m_kernel = local;
|
||||
m_local_kernel = true;
|
||||
@ -815,7 +816,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
||||
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
||||
EvalTo evalToTmp(local, m_kernelArg);
|
||||
internal::TensorExecutor<const EvalTo, GpuDevice, TensorEvaluator<KernelArgType, GpuDevice>::PacketAccess>::run(evalToTmp, m_device);
|
||||
const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
|
||||
internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device);
|
||||
|
||||
m_kernel = local;
|
||||
m_local_kernel = true;
|
||||
|
310
unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
Normal file
310
unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h
Normal file
@ -0,0 +1,310 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorCustomUnaryOp
|
||||
* \ingroup CXX11_Tensor_Module
|
||||
*
|
||||
* \brief Tensor custom class.
|
||||
*
|
||||
*
|
||||
*/
|
||||
namespace internal {
|
||||
template<typename CustomUnaryFunc, typename XprType>
|
||||
struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
|
||||
{
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename XprType::StorageKind StorageKind;
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = traits<XprType>::NumDimensions;
|
||||
static const int Layout = traits<XprType>::Layout;
|
||||
};
|
||||
|
||||
template<typename CustomUnaryFunc, typename XprType>
|
||||
struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense>
|
||||
{
|
||||
typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type;
|
||||
};
|
||||
|
||||
template<typename CustomUnaryFunc, typename XprType>
|
||||
struct nested<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
|
||||
{
|
||||
typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> type;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
|
||||
template<typename CustomUnaryFunc, typename XprType>
|
||||
class TensorCustomUnaryOp : public TensorBase<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, ReadOnlyAccessors>
|
||||
{
|
||||
public:
|
||||
typedef typename internal::traits<TensorCustomUnaryOp>::Scalar Scalar;
|
||||
typedef typename internal::traits<TensorCustomUnaryOp>::Packet Packet;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
typedef typename internal::nested<TensorCustomUnaryOp>::type Nested;
|
||||
typedef typename internal::traits<TensorCustomUnaryOp>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<TensorCustomUnaryOp>::Index Index;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func)
|
||||
: m_expr(expr), m_func(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const CustomUnaryFunc& func() const { return m_func; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
expression() const { return m_expr; }
|
||||
|
||||
protected:
|
||||
typename XprType::Nested m_expr;
|
||||
const CustomUnaryFunc m_func;
|
||||
};
|
||||
|
||||
|
||||
// Eval as rvalue
|
||||
template<typename CustomUnaryFunc, typename XprType, typename Device>
|
||||
struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Device>
|
||||
{
|
||||
typedef TensorCustomUnaryOp<CustomUnaryFunc, XprType> ArgType;
|
||||
typedef typename internal::traits<ArgType>::Index Index;
|
||||
static const int NumDims = internal::traits<ArgType>::NumDimensions;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef
|
||||
typename internal::remove_const<typename ArgType::Scalar>::type Scalar;
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
BlockAccess = false,
|
||||
Layout = TensorEvaluator<XprType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device)
|
||||
: m_op(op), m_device(device), m_result(NULL)
|
||||
{
|
||||
m_dimensions = op.func().dimensions(op.expression());
|
||||
}
|
||||
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
||||
if (data) {
|
||||
evalTo(data);
|
||||
return false;
|
||||
} else {
|
||||
m_result = static_cast<CoeffReturnType*>(
|
||||
m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
|
||||
evalTo(m_result);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||
if (m_result != NULL) {
|
||||
m_device.deallocate(m_result);
|
||||
m_result = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||
return m_result[index];
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
|
||||
return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
|
||||
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(
|
||||
data, m_dimensions);
|
||||
m_op.func().eval(m_op.expression(), result, m_device);
|
||||
}
|
||||
|
||||
Dimensions m_dimensions;
|
||||
const ArgType m_op;
|
||||
const Device& m_device;
|
||||
CoeffReturnType* m_result;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** \class TensorCustomBinaryOp
|
||||
* \ingroup CXX11_Tensor_Module
|
||||
*
|
||||
* \brief Tensor custom class.
|
||||
*
|
||||
*
|
||||
*/
|
||||
namespace internal {
|
||||
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
|
||||
struct traits<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
|
||||
{
|
||||
typedef typename internal::promote_storage_type<typename LhsXprType::Scalar,
|
||||
typename RhsXprType::Scalar>::ret Scalar;
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType,
|
||||
typename RhsXprType::CoeffReturnType>::ret CoeffReturnType;
|
||||
typedef typename internal::promote_storage_type<typename LhsXprType::PacketReturnType,
|
||||
typename RhsXprType::PacketReturnType>::ret PacketReturnType;
|
||||
typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
|
||||
typename traits<RhsXprType>::StorageKind>::ret StorageKind;
|
||||
typedef typename promote_index_type<typename traits<LhsXprType>::Index,
|
||||
typename traits<RhsXprType>::Index>::type Index;
|
||||
typedef typename LhsXprType::Nested LhsNested;
|
||||
typedef typename RhsXprType::Nested RhsNested;
|
||||
typedef typename remove_reference<LhsNested>::type _LhsNested;
|
||||
typedef typename remove_reference<RhsNested>::type _RhsNested;
|
||||
static const int NumDimensions = traits<LhsXprType>::NumDimensions;
|
||||
static const int Layout = traits<LhsXprType>::Layout;
|
||||
};
|
||||
|
||||
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
|
||||
struct eval<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Eigen::Dense>
|
||||
{
|
||||
typedef const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>& type;
|
||||
};
|
||||
|
||||
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
|
||||
struct nested<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> >
|
||||
{
|
||||
typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> type;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
|
||||
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType>
|
||||
class TensorCustomBinaryOp : public TensorBase<TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, ReadOnlyAccessors>
|
||||
{
|
||||
public:
|
||||
typedef typename internal::traits<TensorCustomBinaryOp>::Scalar Scalar;
|
||||
typedef typename internal::traits<TensorCustomBinaryOp>::Packet Packet;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename internal::traits<TensorCustomBinaryOp>::CoeffReturnType CoeffReturnType;
|
||||
typedef typename internal::traits<TensorCustomBinaryOp>::PacketReturnType PacketReturnType;
|
||||
typedef typename internal::nested<TensorCustomBinaryOp>::type Nested;
|
||||
typedef typename internal::traits<TensorCustomBinaryOp>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<TensorCustomBinaryOp>::Index Index;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func)
|
||||
|
||||
: m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const CustomBinaryFunc& func() const { return m_func; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename LhsXprType::Nested>::type&
|
||||
lhsExpression() const { return m_lhs_xpr; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename RhsXprType::Nested>::type&
|
||||
rhsExpression() const { return m_rhs_xpr; }
|
||||
|
||||
protected:
|
||||
typename LhsXprType::Nested m_lhs_xpr;
|
||||
typename RhsXprType::Nested m_rhs_xpr;
|
||||
const CustomBinaryFunc m_func;
|
||||
};
|
||||
|
||||
|
||||
// Eval as rvalue
|
||||
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType, typename Device>
|
||||
struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType>, Device>
|
||||
{
|
||||
typedef TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, RhsXprType> XprType;
|
||||
typedef typename internal::traits<XprType>::Index Index;
|
||||
static const int NumDims = internal::traits<XprType>::NumDimensions;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
BlockAccess = false,
|
||||
Layout = TensorEvaluator<LhsXprType, Device>::Layout,
|
||||
CoordAccess = false, // to be implemented
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
: m_op(op), m_device(device), m_result(NULL)
|
||||
{
|
||||
m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression());
|
||||
}
|
||||
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
||||
if (data) {
|
||||
evalTo(data);
|
||||
return false;
|
||||
} else {
|
||||
m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
|
||||
evalTo(m_result);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||
if (m_result != NULL) {
|
||||
m_device.deallocate(m_result);
|
||||
m_result = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||
return m_result[index];
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
|
||||
return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; }
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) {
|
||||
TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions);
|
||||
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
|
||||
}
|
||||
|
||||
Dimensions m_dimensions;
|
||||
const XprType m_op;
|
||||
const Device& m_device;
|
||||
CoeffReturnType* m_result;
|
||||
};
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H
|
@ -28,8 +28,25 @@ struct DefaultDevice {
|
||||
::memset(buffer, c, n);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
// Running on the host CPU
|
||||
return 1;
|
||||
#else
|
||||
// Running on a CUDA device
|
||||
return 32;
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
// Running single threaded on the host CPU
|
||||
// Should return an enum that encodes the ISA supported by the CPU
|
||||
return 1;
|
||||
#else
|
||||
// Running on a CUDA device
|
||||
return __CUDA_ARCH__ / 100;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
@ -38,10 +55,19 @@ struct DefaultDevice {
|
||||
// We should really use a thread pool here but first we need to find a portable thread pool library.
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
|
||||
// This defines an interface that ThreadPoolDevice can take to use
|
||||
// custom thread pools underneath.
|
||||
class ThreadPoolInterface {
|
||||
public:
|
||||
virtual void Schedule(std::function<void()> fn) = 0;
|
||||
|
||||
virtual ~ThreadPoolInterface() {}
|
||||
};
|
||||
|
||||
// The implementation of the ThreadPool type ensures that the Schedule method
|
||||
// runs the functions it is provided in FIFO order when the scheduling is done
|
||||
// by a single thread.
|
||||
class ThreadPool {
|
||||
class ThreadPool : public ThreadPoolInterface {
|
||||
public:
|
||||
// Construct a pool that contains "num_threads" threads.
|
||||
explicit ThreadPool(int num_threads) {
|
||||
@ -182,7 +208,7 @@ static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) {
|
||||
|
||||
// Build a thread pool device on top the an existing pool of threads.
|
||||
struct ThreadPoolDevice {
|
||||
ThreadPoolDevice(ThreadPool* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { }
|
||||
ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { }
|
||||
|
||||
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
||||
return internal::aligned_malloc(num_bytes);
|
||||
@ -204,6 +230,11 @@ struct ThreadPoolDevice {
|
||||
return num_threads_;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||
// Should return an enum that encodes the ISA supported by the CPU
|
||||
return 1;
|
||||
}
|
||||
|
||||
template <class Function, class... Args>
|
||||
EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const {
|
||||
Notification* n = new Notification();
|
||||
@ -219,7 +250,7 @@ struct ThreadPoolDevice {
|
||||
}
|
||||
|
||||
private:
|
||||
ThreadPool* pool_;
|
||||
ThreadPoolInterface* pool_;
|
||||
size_t num_threads_;
|
||||
};
|
||||
|
||||
@ -260,9 +291,12 @@ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) {
|
||||
assert(status == cudaSuccess);
|
||||
}
|
||||
|
||||
// Cuda stream to use when no stream is specified explicitely.
|
||||
static const cudaStream_t default_stream = cudaStreamDefault;
|
||||
|
||||
struct GpuDevice {
|
||||
// The cudastream is not owned: the caller is responsible for its initialization and eventual destruction.
|
||||
GpuDevice(const cudaStream_t* stream) : stream_(stream) { eigen_assert(stream); }
|
||||
GpuDevice(const cudaStream_t* stream = &default_stream) : stream_(stream) { eigen_assert(stream); }
|
||||
|
||||
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; }
|
||||
|
||||
@ -308,6 +342,8 @@ struct GpuDevice {
|
||||
return 32;
|
||||
}
|
||||
|
||||
inline int majorDeviceVersion() const { return m_deviceProperties.major; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const {
|
||||
cudaStreamSynchronize(*stream_);
|
||||
}
|
||||
|
235
unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
Normal file
235
unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h
Normal file
@ -0,0 +1,235 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \internal
|
||||
*
|
||||
* \class TensorDimensionList
|
||||
* \ingroup CXX11_Tensor_Module
|
||||
*
|
||||
* \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n.
|
||||
*
|
||||
* \sa Tensor
|
||||
*/
|
||||
|
||||
template <typename Index, std::size_t Rank> struct DimensionList {
|
||||
const Index operator[] (const Index i) const { return i; }
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<typename Index, std::size_t Rank> struct array_size<DimensionList<Index, Rank> > {
|
||||
static const size_t value = Rank;
|
||||
};
|
||||
template<typename Index, std::size_t Rank> struct array_size<const DimensionList<Index, Rank> > {
|
||||
static const size_t value = Rank;
|
||||
};
|
||||
|
||||
template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(DimensionList<Index, Rank>&) {
|
||||
return n;
|
||||
}
|
||||
template<DenseIndex n, typename Index, std::size_t Rank> const Index array_get(const DimensionList<Index, Rank>&) {
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
#if defined(EIGEN_HAS_CONSTEXPR)
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_known_statically<DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex) const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_known_statically<const DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex) const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct all_indices_known_statically<DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() () const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct all_indices_known_statically<const DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() () const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct indices_statically_known_to_increase<DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() () const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct indices_statically_known_to_increase<const DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() () const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_eq<DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||
return i == value;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_eq<const DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||
return i == value;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_ne<DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||
return i != value;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_ne<const DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||
return i != value;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_gt<DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||
return i > value;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_gt<const DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||
return i > value;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_lt<DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||
return i < value;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_lt<const DimensionList<Index, Rank> > {
|
||||
constexpr bool operator() (const DenseIndex i, const DenseIndex value) const {
|
||||
return i < value;
|
||||
}
|
||||
};
|
||||
|
||||
#else
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_known_statically<DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_known_statically<const DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct all_indices_known_statically<DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() () const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct all_indices_known_statically<const DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() () const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct indices_statically_known_to_increase<DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() () const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct indices_statically_known_to_increase<const DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() () const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_eq<DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_eq<const DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_ne<DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_ne<const DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_gt<DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_gt<const DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_lt<DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <typename Index, std::size_t Rank>
|
||||
struct index_statically_lt<const DimensionList<Index, Rank> > {
|
||||
EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H
|
@ -69,6 +69,31 @@ struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMaj
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Index, std::size_t n>
|
||||
struct fixed_size_tensor_index_extraction_helper
|
||||
{
|
||||
template <typename Dimensions> EIGEN_DEVICE_FUNC
|
||||
static inline Index run(const Index index,
|
||||
const Dimensions& dimensions)
|
||||
{
|
||||
const Index mult = (index == n) ? 1 : 0;
|
||||
return array_get<n>(dimensions) * mult +
|
||||
fixed_size_tensor_index_extraction_helper<Index, n - 1>::run(index, dimensions);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Index>
|
||||
struct fixed_size_tensor_index_extraction_helper<Index, 0>
|
||||
{
|
||||
template <typename Dimensions> EIGEN_DEVICE_FUNC
|
||||
static inline Index run(const Index index,
|
||||
const Dimensions& dimensions)
|
||||
{
|
||||
const Index mult = (index == 0) ? 1 : 0;
|
||||
return array_get<0>(dimensions) * mult;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@ -99,6 +124,10 @@ struct Sizes : internal::numeric_list<std::ptrdiff_t, Indices...> {
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const int index) const {
|
||||
return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count - 1>::run(index, *this);
|
||||
}
|
||||
|
||||
template <typename T> Sizes& operator = (const T& /*other*/) {
|
||||
// add assertion failure if the size of other is different
|
||||
return *this;
|
||||
@ -114,10 +143,12 @@ struct Sizes : internal::numeric_list<std::ptrdiff_t, Indices...> {
|
||||
}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template <typename std::ptrdiff_t... Indices>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indices...>&) {
|
||||
return Sizes<Indices...>::total_size;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
@ -166,6 +197,24 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex operator[] (const int index) const {
|
||||
switch (index) {
|
||||
case 0:
|
||||
return internal::get<0, Base>::value;
|
||||
case 1:
|
||||
return internal::get<1, Base>::value;
|
||||
case 2:
|
||||
return internal::get<2, Base>::value;
|
||||
case 3:
|
||||
return internal::get<3, Base>::value;
|
||||
case 4:
|
||||
return internal::get<4, Base>::value;
|
||||
default:
|
||||
eigen_assert(false && "index overflow");
|
||||
return static_cast<std::size_t>(-1);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> Sizes& operator = (const T&) {
|
||||
// to do: check the size of other
|
||||
return *this;
|
||||
@ -181,10 +230,12 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
|
||||
}
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
|
||||
return Sizes<V1, V2, V3, V4, V5>::total_size;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -113,9 +113,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
||||
m_impl.evalSubExprsIfNeeded(NULL);
|
||||
return true;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) {
|
||||
eigen_assert(scalar == NULL);
|
||||
return m_impl.evalSubExprsIfNeeded(m_buffer);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
|
||||
|
@ -22,13 +22,8 @@ namespace Eigen {
|
||||
*/
|
||||
namespace internal {
|
||||
|
||||
template <typename Device, typename Expression>
|
||||
struct IsVectorizable {
|
||||
static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
|
||||
};
|
||||
|
||||
// Default strategy: the expression is evaluated with a single cpu thread.
|
||||
template<typename Expression, typename Device = DefaultDevice, bool Vectorizable = IsVectorizable<Device, Expression>::value>
|
||||
template<typename Expression, typename Device, bool Vectorizable>
|
||||
class TensorExecutor
|
||||
{
|
||||
public:
|
||||
@ -198,10 +193,6 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Expression>
|
||||
struct IsVectorizable<GpuDevice, Expression> {
|
||||
static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess && TensorEvaluator<Expression, GpuDevice>::IsAligned;
|
||||
};
|
||||
|
||||
template<typename Expression>
|
||||
class TensorExecutor<Expression, GpuDevice, false>
|
||||
|
@ -116,7 +116,8 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
||||
}
|
||||
typedef TensorEvalToOp<const ArgType> EvalTo;
|
||||
EvalTo evalToTmp(m_buffer, m_op);
|
||||
internal::TensorExecutor<const EvalTo, Device, TensorEvaluator<ArgType, Device>::PacketAccess>::run(evalToTmp, m_device);
|
||||
const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value;
|
||||
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
|
||||
m_impl.cleanup();
|
||||
return true;
|
||||
}
|
||||
|
@ -29,6 +29,7 @@ template<typename TargetType, typename XprType> class TensorConversionOp;
|
||||
template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp;
|
||||
template<typename PatchDim, typename XprType> class TensorPatchOp;
|
||||
template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp;
|
||||
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorVolumePatchOp;
|
||||
template<typename Broadcast, typename XprType> class TensorBroadcastingOp;
|
||||
template<DenseIndex DimId, typename XprType> class TensorChippingOp;
|
||||
template<typename NewDimensions, typename XprType> class TensorReshapingOp;
|
||||
@ -41,14 +42,36 @@ template<typename Strides, typename XprType> class TensorStridingOp;
|
||||
template<typename Generator, typename XprType> class TensorGeneratorOp;
|
||||
template<typename LeftXprType, typename RightXprType> class TensorAssignOp;
|
||||
|
||||
template<typename CustomUnaryFunc, typename XprType> class TensorCustomUnaryOp;
|
||||
template<typename CustomBinaryFunc, typename LhsXprType, typename RhsXprType> class TensorCustomBinaryOp;
|
||||
|
||||
template<typename XprType> class TensorEvalToOp;
|
||||
template<typename XprType> class TensorForcedEvalOp;
|
||||
|
||||
template<typename ExpressionType, typename DeviceType> class TensorDevice;
|
||||
template<typename Derived, typename Device> struct TensorEvaluator;
|
||||
|
||||
class DefaultDevice;
|
||||
class ThreadPoolDevice;
|
||||
class GpuDevice;
|
||||
|
||||
namespace internal {
|
||||
template<typename Expression, typename Device, bool Vectorizable> class TensorExecutor;
|
||||
|
||||
template <typename Device, typename Expression>
|
||||
struct IsVectorizable {
|
||||
static const bool value = TensorEvaluator<Expression, Device>::PacketAccess;
|
||||
};
|
||||
|
||||
template <typename Expression>
|
||||
struct IsVectorizable<GpuDevice, Expression> {
|
||||
static const bool value = TensorEvaluator<Expression, GpuDevice>::PacketAccess &&
|
||||
TensorEvaluator<Expression, GpuDevice>::IsAligned;
|
||||
};
|
||||
|
||||
template <typename Expression, typename Device,
|
||||
bool Vectorizable = IsVectorizable<Device, Expression>::value>
|
||||
class TensorExecutor;
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -17,6 +17,7 @@ namespace internal {
|
||||
template <typename T> struct SumReducer
|
||||
{
|
||||
static const bool PacketAccess = true;
|
||||
static const bool IsStateful = false;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||
(*accum) += t;
|
||||
@ -49,6 +50,8 @@ template <typename T> struct SumReducer
|
||||
template <typename T> struct MeanReducer
|
||||
{
|
||||
static const bool PacketAccess = true;
|
||||
static const bool IsStateful = true;
|
||||
|
||||
MeanReducer() : scalarCount_(0), packetCount_(0) { }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
|
||||
@ -88,6 +91,7 @@ template <typename T> struct MeanReducer
|
||||
template <typename T> struct MaxReducer
|
||||
{
|
||||
static const bool PacketAccess = true;
|
||||
static const bool IsStateful = false;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||
if (t > *accum) { *accum = t; }
|
||||
@ -120,6 +124,7 @@ template <typename T> struct MaxReducer
|
||||
template <typename T> struct MinReducer
|
||||
{
|
||||
static const bool PacketAccess = true;
|
||||
static const bool IsStateful = false;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||
if (t < *accum) { *accum = t; }
|
||||
@ -153,6 +158,7 @@ template <typename T> struct MinReducer
|
||||
template <typename T> struct ProdReducer
|
||||
{
|
||||
static const bool PacketAccess = true;
|
||||
static const bool IsStateful = false;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
|
||||
(*accum) *= t;
|
||||
|
@ -90,7 +90,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
|
||||
{
|
||||
typedef TensorAssignOp<TensorLayoutSwapOp, const TensorLayoutSwapOp> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -100,7 +100,7 @@ class TensorLayoutSwapOp : public TensorBase<TensorLayoutSwapOp<XprType>, WriteA
|
||||
{
|
||||
typedef TensorAssignOp<TensorLayoutSwapOp, const OtherDerived> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
36
unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
Normal file
36
unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
Normal file
@ -0,0 +1,36 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_META_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<bool cond> struct Cond {};
|
||||
|
||||
template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
const T1& choose(Cond<true>, const T1& first, const T2&) {
|
||||
return first;
|
||||
}
|
||||
|
||||
template<typename T1, typename T2> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
const T2& choose(Cond<false>, const T1&, const T2& second) {
|
||||
return second;
|
||||
}
|
||||
|
||||
template <size_t n> struct max_n_1 {
|
||||
static const size_t size = n;
|
||||
};
|
||||
template <> struct max_n_1<0> {
|
||||
static const size_t size = 1;
|
||||
};
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_META_H
|
@ -78,7 +78,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
|
||||
{
|
||||
typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -88,7 +88,7 @@ class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, Xpr
|
||||
{
|
||||
typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -262,7 +262,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
|
||||
{
|
||||
typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -271,7 +271,7 @@ class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, X
|
||||
{
|
||||
typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -411,7 +411,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
||||
{
|
||||
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
||||
eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
|
||||
|
||||
Index inputIndices[] = {0, 0};
|
||||
Index indices[] = {index, index + packetSize - 1};
|
||||
|
@ -44,6 +44,38 @@ struct nested<TensorReductionOp<Op, Dims, XprType>, 1, typename eval<TensorReduc
|
||||
};
|
||||
|
||||
|
||||
template <typename OutputDims> struct DimInitializer {
|
||||
template <typename InputDims, typename ReducedDims> EIGEN_DEVICE_FUNC
|
||||
static void run(const InputDims& input_dims,
|
||||
const array<bool, internal::array_size<InputDims>::value>& reduced,
|
||||
OutputDims* output_dims, ReducedDims* reduced_dims) {
|
||||
const int NumInputDims = internal::array_size<InputDims>::value;
|
||||
int outputIndex = 0;
|
||||
int reduceIndex = 0;
|
||||
for (int i = 0; i < NumInputDims; ++i) {
|
||||
if (reduced[i]) {
|
||||
(*reduced_dims)[reduceIndex] = input_dims[i];
|
||||
++reduceIndex;
|
||||
} else {
|
||||
(*output_dims)[outputIndex] = input_dims[i];
|
||||
++outputIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct DimInitializer<Sizes<1> > {
|
||||
template <typename InputDims, typename Index, size_t Rank> EIGEN_DEVICE_FUNC
|
||||
static void run(const InputDims& input_dims, const array<bool, Rank>&,
|
||||
Sizes<1>*, array<Index, Rank>* reduced_dims) {
|
||||
const int NumInputDims = internal::array_size<InputDims>::value;
|
||||
for (int i = 0; i < NumInputDims; ++i) {
|
||||
(*reduced_dims)[i] = input_dims[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename ReducedDims, int NumTensorDims, int Layout>
|
||||
struct are_inner_most_dims {
|
||||
static const bool value = false;
|
||||
@ -144,7 +176,7 @@ template <int DimIndex, typename Self, typename Op>
|
||||
struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
|
||||
EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
|
||||
for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
|
||||
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
|
||||
InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
|
||||
}
|
||||
@ -154,13 +186,325 @@ struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
|
||||
template <typename Self, typename Op>
|
||||
struct InnerMostDimPreserver<0, Self, Op, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
|
||||
for (int j = 0; j < self.m_reducedDims[0]; ++j) {
|
||||
for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) {
|
||||
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0];
|
||||
reducer.reducePacket(self.m_impl.template packet<Unaligned>(input), accum);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Default full reducer
|
||||
template <typename Self, typename Op, typename Device, bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
|
||||
struct FullReducer {
|
||||
static const bool HasOptimizedImplementation = false;
|
||||
|
||||
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) {
|
||||
const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||
*output = InnerMostDimReducer<Self, Op>::reduce(self, 0, num_coeffs, reducer);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
// Multithreaded full reducers
|
||||
template <typename Eval, typename Op, bool Vectorizable = (Eval::InputPacketAccess & Op::PacketAccess)>
|
||||
struct FullReducerShard {
|
||||
static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) {
|
||||
|
||||
shard->saccum = reducer.initialize();
|
||||
for (typename Eval::Index j = 0; j < numValuesToReduce; ++j) {
|
||||
reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum);
|
||||
}
|
||||
}
|
||||
|
||||
typename Eval::CoeffReturnType saccum;
|
||||
};
|
||||
|
||||
template <typename Eval, typename Op>
|
||||
struct FullReducerShard<Eval, Op, true> {
|
||||
static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) {
|
||||
|
||||
const int packetSize = internal::unpacket_traits<typename Eval::PacketReturnType>::size;
|
||||
const typename Eval::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize;
|
||||
|
||||
shard->paccum = reducer.template initializePacket<typename Eval::PacketReturnType>();
|
||||
for (typename Eval::Index j = 0; j < VectorizedSize; j += packetSize) {
|
||||
reducer.reducePacket(eval.m_impl.template packet<Unaligned>(firstIndex + j), &shard->paccum);
|
||||
}
|
||||
shard->saccum = reducer.initialize();
|
||||
for (typename Eval::Index j = VectorizedSize; j < numValuesToReduce; ++j) {
|
||||
reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum);
|
||||
}
|
||||
}
|
||||
|
||||
typename Eval::PacketReturnType paccum;
|
||||
typename Eval::CoeffReturnType saccum;
|
||||
};
|
||||
|
||||
|
||||
template <typename Self, typename Op>
|
||||
struct FullReducer<Self, Op, ThreadPoolDevice, false> {
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful;
|
||||
|
||||
// launch one reducer per thread and accumulate the result.
|
||||
static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) {
|
||||
typedef typename Self::Index Index;
|
||||
const Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||
const Index blocksize = std::floor<Index>(static_cast<float>(num_coeffs)/device.numThreads());
|
||||
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
|
||||
eigen_assert(num_coeffs >= numblocks * blocksize);
|
||||
|
||||
std::vector<Notification*> results;
|
||||
results.reserve(numblocks);
|
||||
std::vector<FullReducerShard<Self, Op, false> > shards;
|
||||
shards.resize(numblocks);
|
||||
for (Index i = 0; i < numblocks; ++i) {
|
||||
results.push_back(device.enqueue(&FullReducerShard<Self, Op, false>::run, self, i*blocksize, blocksize, reducer, &shards[i]));
|
||||
}
|
||||
|
||||
FullReducerShard<Self, Op, false> finalShard;
|
||||
if (numblocks * blocksize < num_coeffs) {
|
||||
FullReducerShard<Self, Op, false>::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard);
|
||||
} else {
|
||||
finalShard.saccum = reducer.initialize();
|
||||
}
|
||||
|
||||
for (Index i = 0; i < numblocks; ++i) {
|
||||
wait_until_ready(results[i]);
|
||||
delete results[i];
|
||||
}
|
||||
|
||||
for (Index i = 0; i < numblocks; ++i) {
|
||||
reducer.reduce(shards[i].saccum, &finalShard.saccum);
|
||||
}
|
||||
*output = reducer.finalize(finalShard.saccum);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Self, typename Op>
|
||||
struct FullReducer<Self, Op, ThreadPoolDevice, true> {
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful;
|
||||
|
||||
// launch one reducer per thread and accumulate the result.
|
||||
static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) {
|
||||
typedef typename Self::Index Index;
|
||||
const Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||
const Index blocksize = std::floor<Index>(static_cast<float>(num_coeffs)/device.numThreads());
|
||||
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
|
||||
eigen_assert(num_coeffs >= numblocks * blocksize);
|
||||
|
||||
std::vector<Notification*> results;
|
||||
results.reserve(numblocks);
|
||||
std::vector<FullReducerShard<Self, Op, true> > shards;
|
||||
shards.resize(numblocks);
|
||||
for (Index i = 0; i < numblocks; ++i) {
|
||||
results.push_back(device.enqueue(&FullReducerShard<Self, Op, true>::run, self, i*blocksize, blocksize, reducer, &shards[i]));
|
||||
}
|
||||
|
||||
FullReducerShard<Self, Op, true> finalShard;
|
||||
if (numblocks * blocksize < num_coeffs) {
|
||||
FullReducerShard<Self, Op, true>::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard);
|
||||
} else {
|
||||
finalShard.paccum = reducer.template initializePacket<typename Self::PacketReturnType>();
|
||||
finalShard.saccum = reducer.initialize();
|
||||
}
|
||||
|
||||
for (Index i = 0; i < numblocks; ++i) {
|
||||
wait_until_ready(results[i]);
|
||||
delete results[i];
|
||||
}
|
||||
|
||||
for (Index i = 0; i < numblocks; ++i) {
|
||||
reducer.reducePacket(shards[i].paccum, &finalShard.paccum);
|
||||
reducer.reduce(shards[i].saccum, &finalShard.saccum);
|
||||
}
|
||||
|
||||
*output = reducer.finalizeBoth(finalShard.saccum, finalShard.paccum);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||
// Full reducers for GPU, don't vectorize for now
|
||||
|
||||
// Reducer function that enables multiple cuda thread to safely accumulate at the same
|
||||
// output address. It basically reads the current value of the output variable, and
|
||||
// attempts to update it with the new value. If in the meantime another cuda thread
|
||||
// updated the content of the output address it will try again.
|
||||
template <typename T, typename R>
|
||||
__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) {
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
if (sizeof(T) == 4)
|
||||
{
|
||||
unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
|
||||
unsigned int newval = oldval;
|
||||
reducer.reduce(accum, reinterpret_cast<T*>(&newval));
|
||||
if (newval == oldval) {
|
||||
return;
|
||||
}
|
||||
unsigned int readback;
|
||||
while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) {
|
||||
oldval = readback;
|
||||
newval = oldval;
|
||||
reducer.reduce(accum, reinterpret_cast<T*>(&newval));
|
||||
if (newval == oldval) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (sizeof(T) == 8) {
|
||||
unsigned long long oldval = *reinterpret_cast<unsigned long long*>(output);
|
||||
unsigned long long newval = oldval;
|
||||
reducer.reduce(accum, reinterpret_cast<T*>(&newval));
|
||||
if (newval == oldval) {
|
||||
return;
|
||||
}
|
||||
unsigned long long readback;
|
||||
while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) {
|
||||
oldval = readback;
|
||||
newval = oldval;
|
||||
reducer.reduce(accum, reinterpret_cast<T*>(&newval));
|
||||
if (newval == oldval) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
assert(0 && "Wordsize not supported");
|
||||
}
|
||||
#else
|
||||
assert(0 && "Shouldn't be called on unsupported device");
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ inline void atomicReduce(T* output, T accum, SumReducer<T>&) {
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
atomicAdd(output, accum);
|
||||
#else
|
||||
assert(0 && "Shouldn't be called on unsupported device");
|
||||
#endif
|
||||
}
|
||||
|
||||
template <int BlockSize, int NumPerThread, typename Self,
|
||||
typename Reducer, typename Index>
|
||||
__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
|
||||
typename Self::CoeffReturnType* output) {
|
||||
const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
|
||||
|
||||
if (first_index == 0) {
|
||||
*output = reducer.initialize();
|
||||
}
|
||||
|
||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||
for (Index i = 0; i < NumPerThread; ++i) {
|
||||
const Index index = first_index + i * BlockSize;
|
||||
if (index >= num_coeffs) {
|
||||
break;
|
||||
}
|
||||
typename Self::CoeffReturnType val = input.m_impl.coeff(index);
|
||||
reducer.reduce(val, &accum);
|
||||
}
|
||||
|
||||
for (int offset = warpSize/2; offset > 0; offset /= 2) {
|
||||
reducer.reduce(__shfl_down(accum, offset), &accum);
|
||||
}
|
||||
|
||||
if ((threadIdx.x & (warpSize - 1)) == 0) {
|
||||
atomicReduce(output, accum, reducer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename Self, typename Op, bool Vectorizable>
|
||||
struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
|
||||
// Unfortunately nvidia doesn't support well exotic types such as complex,
|
||||
// so reduce the scope of the optimized version of the code to the simple case
|
||||
// of floats.
|
||||
static const bool HasOptimizedImplementation = !Op::IsStateful &&
|
||||
internal::is_same<typename Self::CoeffReturnType, float>::value;
|
||||
|
||||
template <typename OutputType>
|
||||
static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
|
||||
assert(false && "Should only be called on floats");
|
||||
}
|
||||
|
||||
static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) {
|
||||
typedef typename Self::Index Index;
|
||||
|
||||
const Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||
const int block_size = 256;
|
||||
const int num_per_thread = 128;
|
||||
const int num_blocks = std::ceil(static_cast<float>(num_coeffs) / (block_size * num_per_thread));
|
||||
LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread>),
|
||||
num_blocks, block_size, 0, device, reducer, self, num_coeffs, output);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
template <typename Self, typename Op,
|
||||
bool Vectorizable = (Self::InputPacketAccess & Op::PacketAccess)>
|
||||
class BlockReducer {
|
||||
public:
|
||||
typedef typename Self::Index Index;
|
||||
typedef typename Self::Scalar Scalar;
|
||||
typedef typename Self::CoeffReturnType CoeffReturnType;
|
||||
explicit BlockReducer(const Op& reducer) : op_(reducer) {
|
||||
accum_ = op_.initialize();
|
||||
}
|
||||
void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
|
||||
for (Index i = 0; i < num_values_to_reduce; ++i) {
|
||||
op_.reduce(data[index + i], &accum_);
|
||||
}
|
||||
}
|
||||
CoeffReturnType Finalize() {
|
||||
return op_.finalize(accum_);
|
||||
}
|
||||
|
||||
private:
|
||||
CoeffReturnType accum_;
|
||||
Op op_;
|
||||
};
|
||||
|
||||
|
||||
template <typename Self, typename Op>
|
||||
class BlockReducer<Self, Op, true> {
|
||||
public:
|
||||
typedef typename Self::Index Index;
|
||||
typedef typename Self::Scalar Scalar;
|
||||
typedef typename Self::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Self::PacketReturnType PacketReturnType;
|
||||
explicit BlockReducer(const Op& reducer) : op_(reducer) {
|
||||
vaccum_ = op_.template initializePacket<PacketReturnType>();
|
||||
accum_ = op_.initialize();
|
||||
}
|
||||
void Reduce(Index index, Index num_values_to_reduce, Scalar* data) {
|
||||
const int packet_size = internal::unpacket_traits<PacketReturnType>::size;
|
||||
const typename Self::Index vectorized_size = (num_values_to_reduce /
|
||||
packet_size) * packet_size;
|
||||
for (typename Self::Index i = 0; i < vectorized_size; i += packet_size) {
|
||||
op_.reducePacket(internal::ploadt<PacketReturnType, Unaligned>(
|
||||
&data[index + i]), &vaccum_);
|
||||
}
|
||||
|
||||
for (typename Self::Index i = vectorized_size;
|
||||
i < num_values_to_reduce; ++i) {
|
||||
op_.reduce(data[index + i], &accum_);
|
||||
}
|
||||
}
|
||||
typename Self::CoeffReturnType Finalize() {
|
||||
return op_.finalizeBoth(accum_, vaccum_);
|
||||
}
|
||||
|
||||
private:
|
||||
typename Self::PacketReturnType vaccum_;
|
||||
typename Self::CoeffReturnType accum_;
|
||||
Op op_;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
||||
@ -179,6 +523,7 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims)
|
||||
{ }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer)
|
||||
{ }
|
||||
|
||||
@ -186,6 +531,7 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType>
|
||||
const XprType& expression() const { return m_expr; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const Dims& dims() const { return m_dims; }
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const Op& reducer() const { return m_reducer; }
|
||||
|
||||
protected:
|
||||
@ -201,10 +547,11 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
{
|
||||
typedef TensorReductionOp<Op, Dims, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
|
||||
static const int NumInputDims = internal::array_size<InputDimensions>::value;
|
||||
static const int NumReducedDims = internal::array_size<Dims>::value;
|
||||
static const int NumOutputDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims;
|
||||
typedef DSizes<Index, NumOutputDims> Dimensions;
|
||||
typedef typename internal::conditional<NumInputDims==NumReducedDims, Sizes<1>, DSizes<Index, NumOutputDims> >::type Dimensions;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> Self;
|
||||
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
|
||||
@ -218,9 +565,10 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
|
||||
static const bool ReducingInnerMostDims = internal::are_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||
static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||
static const bool RunningFullReduction = (NumInputDims==NumReducedDims);
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
: m_impl(op.expression(), device), m_reducer(op.reducer())
|
||||
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
|
||||
@ -238,17 +586,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
}
|
||||
|
||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||
int outputIndex = 0;
|
||||
int reduceIndex = 0;
|
||||
for (int i = 0; i < NumInputDims; ++i) {
|
||||
if (reduced[i]) {
|
||||
m_reducedDims[reduceIndex] = input_dims[i];
|
||||
++reduceIndex;
|
||||
} else {
|
||||
m_dimensions[outputIndex] = input_dims[i];
|
||||
++outputIndex;
|
||||
}
|
||||
}
|
||||
internal::DimInitializer<Dimensions>::run(input_dims, reduced, &m_dimensions, &m_reducedDims);
|
||||
|
||||
// Precompute output strides.
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
@ -277,8 +615,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
}
|
||||
}
|
||||
|
||||
outputIndex = 0;
|
||||
reduceIndex = 0;
|
||||
int outputIndex = 0;
|
||||
int reduceIndex = 0;
|
||||
for (int i = 0; i < NumInputDims; ++i) {
|
||||
if (reduced[i]) {
|
||||
m_reducedStrides[reduceIndex] = input_strides[i];
|
||||
@ -291,27 +629,50 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
|
||||
// Special case for full reductions
|
||||
if (NumInputDims == NumReducedDims) {
|
||||
m_dimensions[0] = 1;
|
||||
eigen_assert(m_dimensions[0] == 1);
|
||||
m_preservedStrides[0] = internal::array_prod(input_dims);
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef typename internal::remove_const<typename XprType::PacketReturnType>::type PacketReturnType;
|
||||
|
||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
||||
m_impl.evalSubExprsIfNeeded(NULL);
|
||||
|
||||
// Use the FullReducer if possible.
|
||||
if (RunningFullReduction && internal::FullReducer<Self, Op, Device>::HasOptimizedImplementation &&
|
||||
((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) ||
|
||||
(internal::array_prod(m_impl.dimensions()) > 1024 * 1024))) {
|
||||
|
||||
bool need_assign = false;
|
||||
if (!data) {
|
||||
m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType)));
|
||||
data = m_result;
|
||||
need_assign = true;
|
||||
}
|
||||
|
||||
Op reducer(m_reducer);
|
||||
internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data);
|
||||
return need_assign;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||
m_impl.cleanup();
|
||||
if (m_result) {
|
||||
m_device.deallocate(m_result);
|
||||
}
|
||||
}
|
||||
|
||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||
typedef typename internal::remove_const<typename XprType::PacketReturnType>::type PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
if (RunningFullReduction && m_result) {
|
||||
return *m_result;
|
||||
}
|
||||
Op reducer(m_reducer);
|
||||
if (ReducingInnerMostDims) {
|
||||
const Index num_values_to_reduce =
|
||||
@ -372,6 +733,13 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
template <int, typename, typename> friend struct internal::GenericDimReducer;
|
||||
template <typename, typename, bool> friend struct internal::InnerMostDimReducer;
|
||||
template <int, typename, typename, bool> friend struct internal::InnerMostDimPreserver;
|
||||
template <typename S, typename O, typename D, bool V> friend struct internal::FullReducer;
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
template <typename S, typename O, bool V> friend struct internal::FullReducerShard;
|
||||
#endif
|
||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||
template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*);
|
||||
#endif
|
||||
|
||||
// Returns the Index in the input tensor of the first value that needs to be
|
||||
// used to compute the reduction at output index "index".
|
||||
@ -392,7 +760,12 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
startInput += idx * m_preservedStrides[i];
|
||||
index -= idx * m_outputStrides[i];
|
||||
}
|
||||
startInput += index * m_preservedStrides[0];
|
||||
if (PreservingInnerMostDims) {
|
||||
eigen_assert(m_preservedStrides[0] == 1);
|
||||
startInput += index;
|
||||
} else {
|
||||
startInput += index * m_preservedStrides[0];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < NumOutputDims - 1; ++i) {
|
||||
// This is index_i in the output tensor.
|
||||
@ -400,7 +773,12 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
startInput += idx * m_preservedStrides[i];
|
||||
index -= idx * m_outputStrides[i];
|
||||
}
|
||||
startInput += index * m_preservedStrides[NumOutputDims - 1];
|
||||
if (PreservingInnerMostDims) {
|
||||
eigen_assert(m_preservedStrides[NumOutputDims - 1] == 1);
|
||||
startInput += index;
|
||||
} else {
|
||||
startInput += index * m_preservedStrides[NumOutputDims - 1];
|
||||
}
|
||||
}
|
||||
return startInput;
|
||||
}
|
||||
@ -425,6 +803,16 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
|
||||
// Operation to apply for computing the reduction.
|
||||
Op m_reducer;
|
||||
|
||||
// For full reductions
|
||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||
static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
|
||||
#else
|
||||
static const bool RunningOnGPU = false;
|
||||
#endif
|
||||
CoeffReturnType* m_result;
|
||||
|
||||
const Device& m_device;
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -80,7 +80,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
|
||||
{
|
||||
typedef TensorAssignOp<TensorReverseOp, const TensorReverseOp> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -90,7 +90,7 @@ class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
|
||||
{
|
||||
typedef TensorAssignOp<TensorReverseOp, const OtherDerived> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -78,7 +78,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
|
||||
{
|
||||
typedef TensorAssignOp<TensorShufflingOp, const TensorShufflingOp> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -88,7 +88,7 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
|
||||
{
|
||||
typedef TensorAssignOp<TensorShufflingOp, const OtherDerived> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -78,7 +78,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
|
||||
{
|
||||
typedef TensorAssignOp<TensorStridingOp, const TensorStridingOp> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -88,7 +88,7 @@ class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
|
||||
{
|
||||
typedef TensorAssignOp<TensorStridingOp, const OtherDerived> Assign;
|
||||
Assign assign(*this, other);
|
||||
internal::TensorExecutor<const Assign, DefaultDevice, false>::run(assign, DefaultDevice());
|
||||
internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -156,9 +156,9 @@ struct eval<const TensorRef<PlainObjectType>, Eigen::Dense>
|
||||
};
|
||||
|
||||
// TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector.
|
||||
template<typename T, int n=1, typename PlainObject = void> struct nested
|
||||
{
|
||||
typedef typename ref_selector<T>::type type;
|
||||
template<typename T, int n=1, typename PlainObject = void> struct nested
|
||||
{
|
||||
typedef typename ref_selector<T>::type type;
|
||||
};
|
||||
|
||||
template <typename Scalar_, std::size_t NumIndices_, int Options_, typename IndexType_>
|
||||
|
677
unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
Normal file
677
unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h
Normal file
@ -0,0 +1,677 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorVolumePatch
|
||||
* \ingroup CXX11_Tensor_Module
|
||||
*
|
||||
* \brief Patch extraction specialized for processing of volumetric data.
|
||||
* This assumes that the input has a least 4 dimensions ordered as follows:
|
||||
* - channels
|
||||
* - planes
|
||||
* - rows
|
||||
* - columns
|
||||
* - (optional) additional dimensions such as time or batch size.
|
||||
* Calling the volume patch code with patch_planes, patch_rows, and patch_cols
|
||||
* is equivalent to calling the regular patch extraction code with parameters
|
||||
* d, patch_planes, patch_rows, patch_cols, and 1 for all the additional
|
||||
* dimensions.
|
||||
*/
|
||||
namespace internal {
|
||||
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||
struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > : public traits<XprType>
|
||||
{
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
typedef traits<XprType> XprTraits;
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename XprTraits::StorageKind StorageKind;
|
||||
typedef typename XprTraits::Index Index;
|
||||
typedef typename XprType::Nested Nested;
|
||||
typedef typename remove_reference<Nested>::type _Nested;
|
||||
static const int NumDimensions = XprTraits::NumDimensions + 1;
|
||||
static const int Layout = XprTraits::Layout;
|
||||
};
|
||||
|
||||
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||
struct eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, Eigen::Dense>
|
||||
{
|
||||
typedef const TensorVolumePatchOp<Planes, Rows, Cols, XprType>& type;
|
||||
};
|
||||
|
||||
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||
struct nested<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, 1, typename eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType> >::type>
|
||||
{
|
||||
typedef TensorVolumePatchOp<Planes, Rows, Cols, XprType> type;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename XprType>
|
||||
class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, ReadOnlyAccessors>
|
||||
{
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Scalar Scalar;
|
||||
typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Packet Packet;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorVolumePatchOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorVolumePatchOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Index Index;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
|
||||
DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
|
||||
DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
|
||||
DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
|
||||
PaddingType padding_type, Scalar padding_value)
|
||||
: m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
|
||||
m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
|
||||
m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
|
||||
m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
|
||||
m_padding_explicit(false), m_padding_top_z(0), m_padding_bottom_z(0), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0),
|
||||
m_padding_type(padding_type), m_padding_value(padding_value) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
|
||||
DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
|
||||
DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
|
||||
DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
|
||||
DenseIndex padding_top_z, DenseIndex padding_bottom_z,
|
||||
DenseIndex padding_top, DenseIndex padding_bottom,
|
||||
DenseIndex padding_left, DenseIndex padding_right,
|
||||
Scalar padding_value)
|
||||
: m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
|
||||
m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
|
||||
m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
|
||||
m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
|
||||
m_padding_explicit(true), m_padding_top_z(padding_top_z), m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
|
||||
m_padding_left(padding_left), m_padding_right(padding_right),
|
||||
m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex patch_planes() const { return m_patch_planes; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex patch_rows() const { return m_patch_rows; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex patch_cols() const { return m_patch_cols; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex plane_strides() const { return m_plane_strides; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex row_strides() const { return m_row_strides; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex col_strides() const { return m_col_strides; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex in_plane_strides() const { return m_in_plane_strides; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex in_row_strides() const { return m_in_row_strides; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex in_col_strides() const { return m_in_col_strides; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex plane_inflate_strides() const { return m_plane_inflate_strides; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex row_inflate_strides() const { return m_row_inflate_strides; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex col_inflate_strides() const { return m_col_inflate_strides; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
bool padding_explicit() const { return m_padding_explicit; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex padding_top_z() const { return m_padding_top_z; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex padding_bottom_z() const { return m_padding_bottom_z; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex padding_top() const { return m_padding_top; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex padding_bottom() const { return m_padding_bottom; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex padding_left() const { return m_padding_left; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
DenseIndex padding_right() const { return m_padding_right; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
PaddingType padding_type() const { return m_padding_type; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
Scalar padding_value() const { return m_padding_value; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
||||
expression() const { return m_xpr; }
|
||||
|
||||
protected:
|
||||
typename XprType::Nested m_xpr;
|
||||
const DenseIndex m_patch_planes;
|
||||
const DenseIndex m_patch_rows;
|
||||
const DenseIndex m_patch_cols;
|
||||
const DenseIndex m_plane_strides;
|
||||
const DenseIndex m_row_strides;
|
||||
const DenseIndex m_col_strides;
|
||||
const DenseIndex m_in_plane_strides;
|
||||
const DenseIndex m_in_row_strides;
|
||||
const DenseIndex m_in_col_strides;
|
||||
const DenseIndex m_plane_inflate_strides;
|
||||
const DenseIndex m_row_inflate_strides;
|
||||
const DenseIndex m_col_inflate_strides;
|
||||
const bool m_padding_explicit;
|
||||
const DenseIndex m_padding_top_z;
|
||||
const DenseIndex m_padding_bottom_z;
|
||||
const DenseIndex m_padding_top;
|
||||
const DenseIndex m_padding_bottom;
|
||||
const DenseIndex m_padding_left;
|
||||
const DenseIndex m_padding_right;
|
||||
const PaddingType m_padding_type;
|
||||
const Scalar m_padding_value;
|
||||
};
|
||||
|
||||
|
||||
// Eval as rvalue
|
||||
template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device>
|
||||
struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, Device>
|
||||
{
|
||||
typedef TensorVolumePatchOp<Planes, Rows, Cols, ArgType> XprType;
|
||||
typedef typename XprType::Index Index;
|
||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||
static const int NumDims = NumInputDims + 1;
|
||||
typedef DSizes<Index, NumDims> Dimensions;
|
||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
BlockAccess = false,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
CoordAccess = NumDims == 6,
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
: m_impl(op.expression(), device)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(NumDims >= 5, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
|
||||
m_paddingValue = op.padding_value();
|
||||
|
||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||
|
||||
// Cache a few variables.
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
m_inputDepth = input_dims[0];
|
||||
m_inputPlanes = input_dims[1];
|
||||
m_inputRows = input_dims[2];
|
||||
m_inputCols = input_dims[3];
|
||||
} else {
|
||||
m_inputDepth = input_dims[NumInputDims-1];
|
||||
m_inputPlanes = input_dims[NumInputDims-2];
|
||||
m_inputRows = input_dims[NumInputDims-3];
|
||||
m_inputCols = input_dims[NumInputDims-4];
|
||||
}
|
||||
|
||||
m_plane_strides = op.plane_strides();
|
||||
m_row_strides = op.row_strides();
|
||||
m_col_strides = op.col_strides();
|
||||
|
||||
// Input strides and effective input/patch size
|
||||
m_in_plane_strides = op.in_plane_strides();
|
||||
m_in_row_strides = op.in_row_strides();
|
||||
m_in_col_strides = op.in_col_strides();
|
||||
m_plane_inflate_strides = op.plane_inflate_strides();
|
||||
m_row_inflate_strides = op.row_inflate_strides();
|
||||
m_col_inflate_strides = op.col_inflate_strides();
|
||||
|
||||
// The "effective" spatial size after inflating data with zeros.
|
||||
m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1;
|
||||
m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1;
|
||||
m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1;
|
||||
m_patch_planes_eff = op.patch_planes() + (op.patch_planes() - 1) * (m_in_plane_strides - 1);
|
||||
m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1);
|
||||
m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1);
|
||||
|
||||
if (op.padding_explicit()) {
|
||||
m_outputPlanes = numext::ceil((m_input_planes_eff + op.padding_top_z() + op.padding_bottom_z() - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides));
|
||||
m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
|
||||
m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
|
||||
m_planePaddingTop = op.padding_top_z();
|
||||
m_rowPaddingTop = op.padding_top();
|
||||
m_colPaddingLeft = op.padding_left();
|
||||
} else {
|
||||
// Computing padding from the type
|
||||
switch (op.padding_type()) {
|
||||
case PADDING_VALID:
|
||||
m_outputPlanes = numext::ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) / static_cast<float>(m_plane_strides));
|
||||
m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast<float>(m_row_strides));
|
||||
m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast<float>(m_col_strides));
|
||||
m_planePaddingTop = 0;
|
||||
m_rowPaddingTop = 0;
|
||||
m_colPaddingLeft = 0;
|
||||
break;
|
||||
case PADDING_SAME: {
|
||||
m_outputPlanes = numext::ceil(m_input_planes_eff / static_cast<float>(m_plane_strides));
|
||||
m_outputRows = numext::ceil(m_input_rows_eff / static_cast<float>(m_row_strides));
|
||||
m_outputCols = numext::ceil(m_input_cols_eff / static_cast<float>(m_col_strides));
|
||||
const Index dz = m_outputPlanes * m_plane_strides + m_patch_planes_eff - 1 - m_input_planes_eff;
|
||||
const Index dy = m_outputRows * m_row_strides + m_patch_rows_eff - 1 - m_input_rows_eff;
|
||||
const Index dx = m_outputCols * m_col_strides + m_patch_cols_eff - 1 - m_input_cols_eff;
|
||||
m_planePaddingTop = dz - dz / 2;
|
||||
m_rowPaddingTop = dy - dy / 2;
|
||||
m_colPaddingLeft = dx - dx / 2;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
eigen_assert(false && "unexpected padding");
|
||||
}
|
||||
}
|
||||
eigen_assert(m_outputRows > 0);
|
||||
eigen_assert(m_outputCols > 0);
|
||||
eigen_assert(m_outputPlanes > 0);
|
||||
|
||||
// Dimensions for result of extraction.
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
// ColMajor
|
||||
// 0: depth
|
||||
// 1: patch_planes
|
||||
// 2: patch_rows
|
||||
// 3: patch_cols
|
||||
// 4: number of patches
|
||||
// 5 and beyond: anything else (such as batch).
|
||||
m_dimensions[0] = input_dims[0];
|
||||
m_dimensions[1] = op.patch_planes();
|
||||
m_dimensions[2] = op.patch_rows();
|
||||
m_dimensions[3] = op.patch_cols();
|
||||
m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols;
|
||||
for (int i = 5; i < NumDims; ++i) {
|
||||
m_dimensions[i] = input_dims[i-1];
|
||||
}
|
||||
} else {
|
||||
// RowMajor
|
||||
// NumDims-1: depth
|
||||
// NumDims-2: patch_planes
|
||||
// NumDims-3: patch_rows
|
||||
// NumDims-4: patch_cols
|
||||
// NumDims-5: number of patches
|
||||
// NumDims-6 and beyond: anything else (such as batch).
|
||||
m_dimensions[NumDims-1] = input_dims[NumInputDims-1];
|
||||
m_dimensions[NumDims-2] = op.patch_planes();
|
||||
m_dimensions[NumDims-3] = op.patch_rows();
|
||||
m_dimensions[NumDims-4] = op.patch_cols();
|
||||
m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols;
|
||||
for (int i = NumDims-6; i >= 0; --i) {
|
||||
m_dimensions[i] = input_dims[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Strides for the output tensor.
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
m_rowStride = m_dimensions[1];
|
||||
m_colStride = m_dimensions[2] * m_rowStride;
|
||||
m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0];
|
||||
m_otherStride = m_patchStride * m_dimensions[4];
|
||||
} else {
|
||||
m_rowStride = m_dimensions[NumDims-2];
|
||||
m_colStride = m_dimensions[NumDims-3] * m_rowStride;
|
||||
m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1];
|
||||
m_otherStride = m_patchStride * m_dimensions[NumDims-5];
|
||||
}
|
||||
|
||||
// Strides for navigating through the input tensor.
|
||||
m_planeInputStride = m_inputDepth;
|
||||
m_rowInputStride = m_inputDepth * m_inputPlanes;
|
||||
m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes;
|
||||
m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes;
|
||||
|
||||
m_outputPlanesRows = m_outputPlanes * m_outputRows;
|
||||
|
||||
// Fast representations of different variables.
|
||||
m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride);
|
||||
m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride);
|
||||
m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride);
|
||||
m_fastRowStride = internal::TensorIntDivisor<Index>(m_rowStride);
|
||||
m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides);
|
||||
m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides);
|
||||
m_fastInputPlaneStride = internal::TensorIntDivisor<Index>(m_plane_inflate_strides);
|
||||
m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff);
|
||||
m_fastOutputPlanes = internal::TensorIntDivisor<Index>(m_outputPlanes);
|
||||
m_fastOutputPlanesRows = internal::TensorIntDivisor<Index>(m_outputPlanesRows);
|
||||
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]);
|
||||
} else {
|
||||
m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
|
||||
}
|
||||
}
|
||||
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
||||
m_impl.evalSubExprsIfNeeded(NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||
m_impl.cleanup();
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
// Patch index corresponding to the passed in index.
|
||||
const Index patchIndex = index / m_fastPatchStride;
|
||||
|
||||
// Spatial offset within the patch. This has to be translated into 3D
|
||||
// coordinates within the patch.
|
||||
const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth;
|
||||
|
||||
// Batch, etc.
|
||||
const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride;
|
||||
const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride;
|
||||
|
||||
// Calculate column index in the input original tensor.
|
||||
const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
|
||||
const Index colOffset = patchOffset / m_fastColStride;
|
||||
const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
|
||||
const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
|
||||
if (inputCol < 0 || inputCol >= m_input_cols_eff ||
|
||||
((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
|
||||
return Scalar(m_paddingValue);
|
||||
}
|
||||
|
||||
// Calculate row index in the original input tensor.
|
||||
const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
|
||||
const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride;
|
||||
const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
|
||||
const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
|
||||
if (inputRow < 0 || inputRow >= m_input_rows_eff ||
|
||||
((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
|
||||
return Scalar(m_paddingValue);
|
||||
}
|
||||
|
||||
// Calculate plane index in the original input tensor.
|
||||
const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
|
||||
const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride;
|
||||
const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop;
|
||||
const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0);
|
||||
if (inputPlane < 0 || inputPlane >= m_input_planes_eff ||
|
||||
((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) {
|
||||
return Scalar(m_paddingValue);
|
||||
}
|
||||
|
||||
const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
|
||||
const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
|
||||
|
||||
const Index inputIndex = depth +
|
||||
origInputRow * m_rowInputStride +
|
||||
origInputCol * m_colInputStride +
|
||||
origInputPlane * m_planeInputStride +
|
||||
otherIndex * m_otherInputStride;
|
||||
|
||||
return m_impl.coeff(inputIndex);
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
const Index packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
||||
|
||||
if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 ||
|
||||
m_in_plane_strides != 1 || m_plane_inflate_strides != 1) {
|
||||
return packetWithPossibleZero(index);
|
||||
}
|
||||
|
||||
const Index indices[2] = {index, index + packetSize - 1};
|
||||
const Index patchIndex = indices[0] / m_fastPatchStride;
|
||||
if (patchIndex != indices[1] / m_fastPatchStride) {
|
||||
return packetWithPossibleZero(index);
|
||||
}
|
||||
const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride;
|
||||
eigen_assert(otherIndex == indices[1] / m_fastOtherStride);
|
||||
|
||||
// Find the offset of the element wrt the location of the first element.
|
||||
const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth,
|
||||
(indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth};
|
||||
|
||||
const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride;
|
||||
eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride);
|
||||
|
||||
const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
|
||||
const Index colOffsets[2] = {
|
||||
patchOffsets[0] / m_fastColStride,
|
||||
patchOffsets[1] / m_fastColStride};
|
||||
|
||||
// Calculate col indices in the original input tensor.
|
||||
const Index inputCols[2] = {
|
||||
colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft,
|
||||
colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft};
|
||||
if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) {
|
||||
return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
|
||||
}
|
||||
|
||||
if (inputCols[0] != inputCols[1]) {
|
||||
return packetWithPossibleZero(index);
|
||||
}
|
||||
|
||||
const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
|
||||
const Index rowOffsets[2] = {
|
||||
(patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride,
|
||||
(patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride};
|
||||
eigen_assert(rowOffsets[0] <= rowOffsets[1]);
|
||||
// Calculate col indices in the original input tensor.
|
||||
const Index inputRows[2] = {
|
||||
rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop,
|
||||
rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop};
|
||||
|
||||
if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) {
|
||||
return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
|
||||
}
|
||||
|
||||
if (inputRows[0] != inputRows[1]) {
|
||||
return packetWithPossibleZero(index);
|
||||
}
|
||||
|
||||
const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
|
||||
const Index planeOffsets[2] = {
|
||||
patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride,
|
||||
patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride};
|
||||
eigen_assert(planeOffsets[0] <= planeOffsets[1]);
|
||||
const Index inputPlanes[2] = {
|
||||
planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop,
|
||||
planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop};
|
||||
|
||||
if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) {
|
||||
return internal::pset1<PacketReturnType>(Scalar(m_paddingValue));
|
||||
}
|
||||
|
||||
if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) {
|
||||
// no padding
|
||||
const int depth_index = static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 0 : NumDims - 1;
|
||||
const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
|
||||
const Index inputIndex = depth +
|
||||
inputRows[0] * m_rowInputStride +
|
||||
inputCols[0] * m_colInputStride +
|
||||
m_planeInputStride * inputPlanes[0] +
|
||||
otherIndex * m_otherInputStride;
|
||||
return m_impl.template packet<Unaligned>(inputIndex);
|
||||
}
|
||||
|
||||
return packetWithPossibleZero(index);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
||||
|
||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||
|
||||
Index planePaddingTop() const { return m_planePaddingTop; }
|
||||
Index rowPaddingTop() const { return m_rowPaddingTop; }
|
||||
Index colPaddingLeft() const { return m_colPaddingLeft; }
|
||||
Index outputPlanes() const { return m_outputPlanes; }
|
||||
Index outputRows() const { return m_outputRows; }
|
||||
Index outputCols() const { return m_outputCols; }
|
||||
Index userPlaneStride() const { return m_plane_strides; }
|
||||
Index userRowStride() const { return m_row_strides; }
|
||||
Index userColStride() const { return m_col_strides; }
|
||||
Index userInPlaneStride() const { return m_in_plane_strides; }
|
||||
Index userInRowStride() const { return m_in_row_strides; }
|
||||
Index userInColStride() const { return m_in_col_strides; }
|
||||
Index planeInflateStride() const { return m_plane_inflate_strides; }
|
||||
Index rowInflateStride() const { return m_row_inflate_strides; }
|
||||
Index colInflateStride() const { return m_col_inflate_strides; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords) const
|
||||
{
|
||||
// ColMajor
|
||||
// 0: depth, 1: patch_planes, 2: patch_rows, 3: patch_cols, 4: number of patches, 5: batches
|
||||
// RowMajor
|
||||
// 0: batches, 1: number of patches, 2: patch_cols , 3: patch_rows, 4: patch_planes, 5: depth
|
||||
const Index patch3DIndex = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 4 : 1];
|
||||
const Index colOffset = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 3 : 2];
|
||||
const Index rowOffset= coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 2 : 3];
|
||||
const Index planeOffset = coords[static_cast<int>(Layout) == static_cast<int>(ColMajor) ? 1 : 4];
|
||||
|
||||
array<Index, NumDims-1> inputCoords;
|
||||
|
||||
const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
|
||||
const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
|
||||
const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
|
||||
if (inputCol < 0 || inputCol >= m_input_cols_eff ||
|
||||
((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
|
||||
return Scalar(m_paddingValue);
|
||||
}
|
||||
|
||||
const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
|
||||
const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
|
||||
const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
|
||||
if (inputRow < 0 || inputRow >= m_input_rows_eff ||
|
||||
((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
|
||||
return Scalar(m_paddingValue);
|
||||
}
|
||||
|
||||
const Index planeIndex = patch3DIndex - colIndex * m_outputPlanesRows - rowIndex * m_outputRows;
|
||||
const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop;
|
||||
const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0);
|
||||
if (inputPlane < 0 || inputPlane >= m_input_planes_eff ||
|
||||
((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) {
|
||||
return Scalar(m_paddingValue);
|
||||
}
|
||||
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
inputCoords[0] = coords[0]; // depth
|
||||
inputCoords[1] = origInputPlane;
|
||||
inputCoords[2] = origInputRow;
|
||||
inputCoords[3] = origInputCol;
|
||||
inputCoords[4] = coords[5]; // batch
|
||||
} else {
|
||||
inputCoords[4] = coords[5]; // depth
|
||||
inputCoords[3] = origInputPlane;
|
||||
inputCoords[2] = origInputRow;
|
||||
inputCoords[1] = origInputCol;
|
||||
inputCoords[0] = coords[0]; // batch
|
||||
}
|
||||
if (TensorEvaluator<ArgType, Device>::CoordAccess) {
|
||||
return m_impl.coeff(inputCoords);
|
||||
} else {
|
||||
Index inputIndex;
|
||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||
inputIndex =
|
||||
inputCoords[4] * m_otherInputStride +
|
||||
inputCoords[3] * m_colInputStride +
|
||||
inputCoords[2] * m_rowInputStride +
|
||||
inputCoords[1] * m_planeInputStride +
|
||||
inputCoords[0];
|
||||
} else {
|
||||
inputIndex =
|
||||
inputCoords[0] * m_otherInputStride +
|
||||
inputCoords[1] * m_colInputStride +
|
||||
inputCoords[2] * m_rowInputStride +
|
||||
inputCoords[3] * m_planeInputStride +
|
||||
inputCoords[4];
|
||||
}
|
||||
return m_impl.coeff(inputIndex);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||
{
|
||||
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||
for (int i = 0; i < packetSize; ++i) {
|
||||
values[i] = coeff(index+i);
|
||||
}
|
||||
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
||||
return rslt;
|
||||
}
|
||||
|
||||
Dimensions m_dimensions;
|
||||
|
||||
// Parameters passed to the costructor.
|
||||
Index m_plane_strides;
|
||||
Index m_row_strides;
|
||||
Index m_col_strides;
|
||||
|
||||
Index m_outputPlanes;
|
||||
Index m_outputRows;
|
||||
Index m_outputCols;
|
||||
|
||||
Index m_planePaddingTop;
|
||||
Index m_rowPaddingTop;
|
||||
Index m_colPaddingLeft;
|
||||
|
||||
Index m_in_plane_strides;
|
||||
Index m_in_row_strides;
|
||||
Index m_in_col_strides;
|
||||
|
||||
Index m_plane_inflate_strides;
|
||||
Index m_row_inflate_strides;
|
||||
Index m_col_inflate_strides;
|
||||
|
||||
// Cached input size.
|
||||
Index m_inputDepth;
|
||||
Index m_inputPlanes;
|
||||
Index m_inputRows;
|
||||
Index m_inputCols;
|
||||
|
||||
// Other cached variables.
|
||||
Index m_outputPlanesRows;
|
||||
|
||||
// Effective input/patch post-inflation size.
|
||||
Index m_input_planes_eff;
|
||||
Index m_input_rows_eff;
|
||||
Index m_input_cols_eff;
|
||||
Index m_patch_planes_eff;
|
||||
Index m_patch_rows_eff;
|
||||
Index m_patch_cols_eff;
|
||||
|
||||
// Strides for the output tensor.
|
||||
Index m_otherStride;
|
||||
Index m_patchStride;
|
||||
Index m_rowStride;
|
||||
Index m_colStride;
|
||||
|
||||
// Strides for the input tensor.
|
||||
Index m_planeInputStride;
|
||||
Index m_rowInputStride;
|
||||
Index m_colInputStride;
|
||||
Index m_otherInputStride;
|
||||
|
||||
internal::TensorIntDivisor<Index> m_fastOtherStride;
|
||||
internal::TensorIntDivisor<Index> m_fastPatchStride;
|
||||
internal::TensorIntDivisor<Index> m_fastColStride;
|
||||
internal::TensorIntDivisor<Index> m_fastRowStride;
|
||||
internal::TensorIntDivisor<Index> m_fastInputPlaneStride;
|
||||
internal::TensorIntDivisor<Index> m_fastInputRowStride;
|
||||
internal::TensorIntDivisor<Index> m_fastInputColStride;
|
||||
internal::TensorIntDivisor<Index> m_fastInputColsEff;
|
||||
internal::TensorIntDivisor<Index> m_fastOutputPlanesRows;
|
||||
internal::TensorIntDivisor<Index> m_fastOutputPlanes;
|
||||
internal::TensorIntDivisor<Index> m_fastOutputDepth;
|
||||
|
||||
Scalar m_paddingValue;
|
||||
|
||||
TensorEvaluator<ArgType, Device> m_impl;
|
||||
};
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
|
@ -125,6 +125,7 @@ if(EIGEN_TEST_CXX11)
|
||||
ei_add_test(cxx11_tensor_padding "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_patch "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_image_patch "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_volume_patch "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_reduction "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_shuffling "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_striding "-std=c++0x")
|
||||
@ -136,10 +137,12 @@ if(EIGEN_TEST_CXX11)
|
||||
ei_add_test(cxx11_tensor_layout_swap "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_io "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_generator "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_custom_op "-std=c++0x")
|
||||
|
||||
# These tests needs nvcc
|
||||
# ei_add_test(cxx11_tensor_device "-std=c++0x")
|
||||
# ei_add_test(cxx11_tensor_cuda "-std=c++0x")
|
||||
# ei_add_test(cxx11_tensor_contract_cuda "-std=c++0x")
|
||||
# ei_add_test(cxx11_tensor_reduction_cuda "-std=c++0x")
|
||||
|
||||
endif()
|
||||
|
@ -354,7 +354,3 @@ void test_cxx11_meta()
|
||||
CALL_SUBTEST(test_array_zip_and_apply());
|
||||
CALL_SUBTEST(test_array_misc());
|
||||
}
|
||||
|
||||
/*
|
||||
* kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle;
|
||||
*/
|
||||
|
107
unsupported/test/cxx11_tensor_custom_op.cpp
Normal file
107
unsupported/test/cxx11_tensor_custom_op.cpp
Normal file
@ -0,0 +1,107 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
using Eigen::Tensor;
|
||||
|
||||
|
||||
struct InsertZeros {
|
||||
DSizes<DenseIndex, 2> dimensions(const Tensor<float, 2>& input) const {
|
||||
DSizes<DenseIndex, 2> result;
|
||||
result[0] = input.dimension(0) * 2;
|
||||
result[1] = input.dimension(1) * 2;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename Output, typename Device>
|
||||
void eval(const Tensor<float, 2>& input, Output& output, const Device& device) const
|
||||
{
|
||||
array<DenseIndex, 2> strides{{2, 2}};
|
||||
output.stride(strides).device(device) = input;
|
||||
|
||||
Eigen::DSizes<DenseIndex, 2> offsets(1,1);
|
||||
Eigen::DSizes<DenseIndex, 2> extents(output.dimension(0)-1, output.dimension(1)-1);
|
||||
output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f);
|
||||
}
|
||||
};
|
||||
|
||||
static void test_custom_unary_op()
|
||||
{
|
||||
Tensor<float, 2> tensor(3,5);
|
||||
tensor.setRandom();
|
||||
|
||||
Tensor<float, 2> result = tensor.customOp(InsertZeros());
|
||||
VERIFY_IS_EQUAL(result.dimension(0), 6);
|
||||
VERIFY_IS_EQUAL(result.dimension(1), 10);
|
||||
|
||||
for (int i = 0; i < 6; i+=2) {
|
||||
for (int j = 0; j < 10; j+=2) {
|
||||
VERIFY_IS_EQUAL(result(i, j), tensor(i/2, j/2));
|
||||
}
|
||||
}
|
||||
for (int i = 1; i < 6; i+=2) {
|
||||
for (int j = 1; j < 10; j+=2) {
|
||||
VERIFY_IS_EQUAL(result(i, j), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct BatchMatMul {
|
||||
DSizes<DenseIndex, 3> dimensions(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2) const {
|
||||
DSizes<DenseIndex, 3> result;
|
||||
result[0] = input1.dimension(0);
|
||||
result[1] = input2.dimension(1);
|
||||
result[2] = input2.dimension(2);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename Output, typename Device>
|
||||
void eval(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2,
|
||||
Output& output, const Device& device) const
|
||||
{
|
||||
typedef Tensor<float, 3>::DimensionPair DimPair;
|
||||
array<DimPair, 1> dims({{DimPair(1, 0)}});
|
||||
for (int i = 0; i < output.dimension(2); ++i) {
|
||||
output.template chip<2>(i).device(device) = input1.chip<2>(i).contract(input2.chip<2>(i), dims);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static void test_custom_binary_op()
|
||||
{
|
||||
Tensor<float, 3> tensor1(2,3,5);
|
||||
tensor1.setRandom();
|
||||
Tensor<float, 3> tensor2(3,7,5);
|
||||
tensor2.setRandom();
|
||||
|
||||
Tensor<float, 3> result = tensor1.customOp(tensor2, BatchMatMul());
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
typedef Tensor<float, 3>::DimensionPair DimPair;
|
||||
array<DimPair, 1> dims({{DimPair(1, 0)}});
|
||||
Tensor<float, 2> reference = tensor1.chip<2>(i).contract(tensor2.chip<2>(i), dims);
|
||||
TensorRef<Tensor<float, 2>> val = result.chip<2>(i);
|
||||
for (int j = 0; j < 2; ++j) {
|
||||
for (int k = 0; k < 7; ++k) {
|
||||
VERIFY_IS_APPROX(val(j, k), reference(j, k));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cxx11_tensor_custom_op()
|
||||
{
|
||||
CALL_SUBTEST(test_custom_unary_op());
|
||||
CALL_SUBTEST(test_custom_binary_op());
|
||||
}
|
55
unsupported/test/cxx11_tensor_reduction_cuda.cpp
Normal file
55
unsupported/test/cxx11_tensor_reduction_cuda.cpp
Normal file
@ -0,0 +1,55 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#define EIGEN_TEST_NO_LONGDOUBLE
|
||||
#define EIGEN_TEST_NO_COMPLEX
|
||||
#define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_full_reductions() {
|
||||
|
||||
Eigen::GpuDevice gpu_device;
|
||||
|
||||
const int num_rows = internal::random<int>(1024, 5*1024);
|
||||
const int num_cols = internal::random<int>(1024, 5*1024);
|
||||
|
||||
Tensor<float, 2, DataLayout> in(num_rows, num_cols);
|
||||
in.setRandom();
|
||||
|
||||
Tensor<float, 1, DataLayout> full_redux(1);
|
||||
full_redux = in.sum();
|
||||
|
||||
std::size_t in_bytes = in.size() * sizeof(float);
|
||||
std::size_t out_bytes = full_redux.size() * sizeof(float);
|
||||
float* gpu_in_ptr = static_cast<float*>(gpu_device.allocate(in_bytes));
|
||||
float* gpu_out_ptr = static_cast<float*>(gpu_device.allocate(out_bytes));
|
||||
gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
|
||||
|
||||
TensorMap<Tensor<float, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols);
|
||||
TensorMap<Tensor<float, 1, DataLayout> > out_gpu(gpu_out_ptr, 1);
|
||||
|
||||
out_gpu.device(gpu_device) = in_gpu.sum();
|
||||
|
||||
Tensor<float, 1, DataLayout> full_redux_gpu(1);
|
||||
gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
|
||||
gpu_device.synchronize();
|
||||
|
||||
// Check that the CPU and GPU reductions return the same result.
|
||||
VERIFY_IS_APPROX(full_redux(0), full_redux_gpu(0));
|
||||
}
|
||||
|
||||
void test_cxx11_tensor_reduction_cuda() {
|
||||
CALL_SUBTEST(test_full_reductions<ColMajor>());
|
||||
CALL_SUBTEST(test_full_reductions<RowMajor>());
|
||||
}
|
@ -228,6 +228,29 @@ static void test_multithread_contraction_agrees_with_singlethread() {
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_multithreaded_reductions() {
|
||||
const int num_threads = internal::random<int>(3, 11);
|
||||
ThreadPool thread_pool(num_threads);
|
||||
Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
|
||||
|
||||
const int num_rows = internal::random<int>(13, 732);
|
||||
const int num_cols = internal::random<int>(13, 732);
|
||||
Tensor<float, 2, DataLayout> t1(num_rows, num_cols);
|
||||
t1.setRandom();
|
||||
|
||||
Tensor<float, 1, DataLayout> full_redux(1);
|
||||
full_redux = t1.sum();
|
||||
|
||||
Tensor<float, 1, DataLayout> full_redux_tp(1);
|
||||
full_redux_tp.device(thread_pool_device) = t1.sum();
|
||||
|
||||
// Check that the single threaded and the multi threaded reductions return
|
||||
// the same result.
|
||||
VERIFY_IS_APPROX(full_redux(0), full_redux_tp(0));
|
||||
}
|
||||
|
||||
|
||||
static void test_memcpy() {
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
@ -271,6 +294,9 @@ void test_cxx11_tensor_thread_pool()
|
||||
CALL_SUBTEST(test_contraction_corner_cases<ColMajor>());
|
||||
CALL_SUBTEST(test_contraction_corner_cases<RowMajor>());
|
||||
|
||||
CALL_SUBTEST(test_multithreaded_reductions<ColMajor>());
|
||||
CALL_SUBTEST(test_multithreaded_reductions<RowMajor>());
|
||||
|
||||
CALL_SUBTEST(test_memcpy());
|
||||
|
||||
CALL_SUBTEST(test_multithread_random());
|
||||
|
112
unsupported/test/cxx11_tensor_volume_patch.cpp
Normal file
112
unsupported/test/cxx11_tensor_volume_patch.cpp
Normal file
@ -0,0 +1,112 @@
|
||||
#include "main.h"
|
||||
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
using Eigen::Tensor;
|
||||
|
||||
static void test_single_voxel_patch()
|
||||
{
|
||||
Tensor<float, 5> tensor(4,2,3,5,7);
|
||||
tensor.setRandom();
|
||||
Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout();
|
||||
|
||||
Tensor<float, 6> single_voxel_patch;
|
||||
single_voxel_patch = tensor.extract_volume_patches(1, 1, 1);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch.dimension(0), 4);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch.dimension(1), 1);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch.dimension(2), 1);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch.dimension(3), 1);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch.dimension(4), 2 * 3 * 5);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch.dimension(5), 7);
|
||||
|
||||
Tensor<float, 6, RowMajor> single_voxel_patch_row_major;
|
||||
single_voxel_patch_row_major = tensor_row_major.extract_volume_patches(1, 1, 1);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(0), 7);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(1), 2 * 3 * 5);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(2), 1);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(3), 1);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(4), 1);
|
||||
VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(5), 4);
|
||||
|
||||
for (int i = 0; i < tensor.size(); ++i) {
|
||||
VERIFY_IS_EQUAL(tensor.data()[i], single_voxel_patch.data()[i]);
|
||||
VERIFY_IS_EQUAL(tensor_row_major.data()[i], single_voxel_patch_row_major.data()[i]);
|
||||
VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_entire_volume_patch()
|
||||
{
|
||||
const int depth = 4;
|
||||
const int patch_z = 2;
|
||||
const int patch_y = 3;
|
||||
const int patch_x = 5;
|
||||
const int batch = 7;
|
||||
|
||||
Tensor<float, 5> tensor(depth, patch_z, patch_y, patch_x, batch);
|
||||
tensor.setRandom();
|
||||
Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout();
|
||||
|
||||
Tensor<float, 6> entire_volume_patch;
|
||||
entire_volume_patch = tensor.extract_volume_patches(patch_z, patch_y, patch_x);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch.dimension(0), depth);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch.dimension(1), patch_z);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch.dimension(2), patch_y);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch.dimension(3), patch_x);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch.dimension(4), patch_z * patch_y * patch_x);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch.dimension(5), batch);
|
||||
|
||||
Tensor<float, 6, RowMajor> entire_volume_patch_row_major;
|
||||
entire_volume_patch_row_major = tensor_row_major.extract_volume_patches(patch_z, patch_y, patch_x);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(0), batch);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(1), patch_z * patch_y * patch_x);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(2), patch_x);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(3), patch_y);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(4), patch_z);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(5), depth);
|
||||
|
||||
const int dz = patch_z - 1;
|
||||
const int dy = patch_y - 1;
|
||||
const int dx = patch_x - 1;
|
||||
|
||||
const int forward_pad_z = dz - dz / 2;
|
||||
const int forward_pad_y = dy - dy / 2;
|
||||
const int forward_pad_x = dx - dx / 2;
|
||||
|
||||
for (int pz = 0; pz < patch_z; pz++) {
|
||||
for (int py = 0; py < patch_y; py++) {
|
||||
for (int px = 0; px < patch_x; px++) {
|
||||
const int patchId = pz + patch_z * (py + px * patch_y);
|
||||
for (int z = 0; z < patch_z; z++) {
|
||||
for (int y = 0; y < patch_y; y++) {
|
||||
for (int x = 0; x < patch_x; x++) {
|
||||
for (int b = 0; b < batch; b++) {
|
||||
for (int d = 0; d < depth; d++) {
|
||||
float expected = 0.0f;
|
||||
float expected_row_major = 0.0f;
|
||||
const int eff_z = z - forward_pad_z + pz;
|
||||
const int eff_y = y - forward_pad_y + py;
|
||||
const int eff_x = x - forward_pad_x + px;
|
||||
if (eff_z >= 0 && eff_y >= 0 && eff_x >= 0 &&
|
||||
eff_z < patch_z && eff_y < patch_y && eff_x < patch_x) {
|
||||
expected = tensor(d, eff_z, eff_y, eff_x, b);
|
||||
expected_row_major = tensor_row_major(b, eff_x, eff_y, eff_z, d);
|
||||
}
|
||||
VERIFY_IS_EQUAL(entire_volume_patch(d, z, y, x, patchId, b), expected);
|
||||
VERIFY_IS_EQUAL(entire_volume_patch_row_major(b, patchId, x, y, z, d), expected_row_major);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_cxx11_tensor_volume_patch()
|
||||
{
|
||||
CALL_SUBTEST(test_single_voxel_patch());
|
||||
CALL_SUBTEST(test_entire_volume_patch());
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user