diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 82c12076e..151c05526 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -83,22 +83,10 @@ template class ArrayBase #endif // not EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily - * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const - * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either - * PlainObject or const PlainObject&. - */ - typedef Array::Scalar, - internal::traits::RowsAtCompileTime, - internal::traits::ColsAtCompileTime, - AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), - internal::traits::MaxRowsAtCompileTime, - internal::traits::MaxColsAtCompileTime - > PlainObject; - + typedef typename Base::PlainObject PlainObject; /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 4622e2759..97938fdba 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -756,6 +756,26 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src) call_assignment_no_alias(dst, src, internal::assign_op()); } +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) +{ + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) + + Assignment::run(dst, src, func); +} +template +EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) +{ + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); +} + // forward declaration template void check_for_aliasing(const Dst &dst, const Src &src); @@ -783,7 +803,6 @@ struct Assignment EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - src.evalTo(dst); } }; diff --git a/Eigen/src/Core/Assign_MKL.h b/Eigen/src/Core/Assign_MKL.h old mode 100644 new mode 100755 index d4ef1d3a4..897187a30 --- a/Eigen/src/Core/Assign_MKL.h +++ b/Eigen/src/Core/Assign_MKL.h @@ -1,6 +1,7 @@ /* Copyright (c) 2011, Intel Corporation. All rights reserved. - + Copyright (C) 2015 Gael Guennebaud + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -37,17 +38,13 @@ namespace Eigen { namespace internal { -template struct vml_call -{ enum { IsSupported = 0 }; }; - -template +template class vml_assign_traits { private: enum { DstHasDirectAccess = Dst::Flags & DirectAccessBit, SrcHasDirectAccess = Src::Flags & DirectAccessBit, - StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) @@ -57,173 +54,118 @@ class vml_assign_traits : int(Dst::MaxRowsAtCompileTime), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, - MightEnableVml = vml_call::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess - && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, + MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, - LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, - MayEnableVml = MightEnableVml && LargeEnough, - MayLinearize = MayEnableVml && MightLinearize + LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD }; public: enum { - Traversal = MayLinearize ? LinearVectorizedTraversal - : MayEnableVml ? InnerVectorizedTraversal - : DefaultTraversal + EnableVml = MightEnableVml && LargeEnough, + Traversal = MightLinearize ? LinearTraversal : DefaultTraversal }; }; -template::Traversal > -struct vml_assign_impl - : assign_impl,Traversal,Unrolling,BuiltIn> -{ -}; - -template -struct vml_assign_impl -{ - typedef typename Derived1::Scalar Scalar; - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) { - const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : - &(src.nestedExpression().coeffRef(0, outer)); - Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); - vml_call::run(src.functor(), innerSize, src_ptr, dst_ptr ); - } - } -}; - -template -struct vml_assign_impl -{ - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - vml_call::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); - } -}; - -// Macroses - -#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ - template \ - struct assign_impl, TRAVERSAL, UNROLLING, Specialized> { \ - static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp &src) { \ - vml_assign_impl::run(dst, src); \ - } \ - }; - -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) - - +#define EIGEN_PP_EXPAND(ARG) ARG #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) -#define EIGEN_MKL_VML_MODE VML_HA +#define EIGEN_VMLMODE_EXPAND_LA , VML_HA #else -#define EIGEN_MKL_VML_MODE VML_LA +#define EIGEN_VMLMODE_EXPAND_LA , VML_LA #endif -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ - } \ +#define EIGEN_VMLMODE_EXPAND__ + +#define EIGEN_VMLMODE_PREFIX_LA vm +#define EIGEN_VMLMODE_PREFIX__ v +#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested> \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml,EIGENTYPE>::type> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + if(vml_assign_traits::Traversal==LinearTraversal) { \ + VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ + &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ + }; \ + + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) + + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA) +// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested> \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml,EIGENTYPE>::type> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + VMLTYPE exponent = reinterpret_cast(src.functor().m_exponent); \ + if(vml_assign_traits::Traversal==LinearTraversal) \ + { \ + VMLOP( dst.size(), (const VMLTYPE*)src.nestedExpression().data(), exponent, \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ + &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& func, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - EIGENTYPE exponent = func.m_exponent; \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ - (VMLTYPE*)dst, &vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) - - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) - - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sinh, Sinh) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cosh, Cosh) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(atan, Atan) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tanh, Tanh) -//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log10, Log10) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt) - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(arg, Arg) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil) - -// The vm*powx functions are not avaibale in the windows version of MKL. -#ifndef _WIN32 -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) -#endif + +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA) } // end namespace internal diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 87ea14bac..395861c0c 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -113,10 +113,10 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) { - return CwiseNullaryOp(rows, cols, func); + return CwiseNullaryOp(rows, cols, func); } /** \returns an expression of a matrix defined by a custom functor \a func @@ -139,12 +139,12 @@ DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); - else return CwiseNullaryOp(size, 1, func); + if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); + else return CwiseNullaryOp(size, 1, func); } /** \returns an expression of a matrix defined by a custom functor \a func @@ -158,10 +158,10 @@ DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(const CustomNullaryOp& func) { - return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); + return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); } /** \returns an expression of a constant matrix of value \a value diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 4e03e4a56..d486f3fd5 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -49,6 +49,8 @@ template class DenseBase public: using internal::special_scalar_op_base::Scalar, typename NumTraits::Scalar>::Real>::operator*; + using internal::special_scalar_op_base::Scalar, + typename NumTraits::Scalar>::Real>::operator/; /** Inner iterator type to iterate over the coefficients of a row or column. @@ -178,6 +180,35 @@ template class DenseBase }; enum { IsPlainObjectBase = 0 }; + + /** The plain matrix type corresponding to this expression. + * \sa PlainObject */ + typedef Matrix::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainMatrix; + + /** The plain array type corresponding to this expression. + * \sa PlainObject */ + typedef Array::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainArray; + + /** \brief The plain matrix or array type corresponding to this expression. + * + * This is not necessarily exactly the return type of eval(). In the case of plain matrices, + * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed + * that the return type of eval() is either PlainObject or const PlainObject&. + */ + typedef typename internal::conditional::XprKind,MatrixXpr >::value, + PlainMatrix, PlainArray>::type PlainObject; /** \returns the number of nonzero coefficients which is in practice the number * of stored coefficients. */ @@ -237,13 +268,12 @@ template class DenseBase } #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */ - typedef CwiseNullaryOp,Derived> SequentialLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> SequentialLinSpacedReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ - typedef CwiseNullaryOp,Derived> RandomAccessLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> RandomAccessLinSpacedReturnType; /** \internal the return type of MatrixBase::eigenvalues() */ typedef Matrix::Scalar>::Real, internal::traits::ColsAtCompileTime, 1> EigenvaluesReturnType; @@ -322,13 +352,13 @@ template class DenseBase LinSpaced(const Scalar& low, const Scalar& high); template EIGEN_DEVICE_FUNC - static const CwiseNullaryOp + static const CwiseNullaryOp NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func); template EIGEN_DEVICE_FUNC - static const CwiseNullaryOp + static const CwiseNullaryOp NullaryExpr(Index size, const CustomNullaryOp& func); template EIGEN_DEVICE_FUNC - static const CwiseNullaryOp + static const CwiseNullaryOp NullaryExpr(const CustomNullaryOp& func); EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols); @@ -466,9 +496,10 @@ template class DenseBase ConstColwiseReturnType colwise() const; ColwiseReturnType colwise(); - static const CwiseNullaryOp,Derived> Random(Index rows, Index cols); - static const CwiseNullaryOp,Derived> Random(Index size); - static const CwiseNullaryOp,Derived> Random(); + typedef CwiseNullaryOp,PlainObject> RandomReturnType; + static const RandomReturnType Random(Index rows, Index cols); + static const RandomReturnType Random(Index size); + static const RandomReturnType Random(); template const Select diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index dfc651908..408f76b1a 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -349,6 +349,7 @@ struct hypot_retval template struct cast_impl { + EIGEN_DEVICE_FUNC static inline NewType run(const OldType& x) { return static_cast(x); @@ -360,6 +361,7 @@ struct cast_impl template inline NewType cast(const OldType& x) { + EIGEN_DEVICE_FUNC return cast_impl::run(x); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 1686933e4..8549cf83c 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -81,6 +81,7 @@ template class MatrixBase using Base::operator*=; using Base::operator/=; using Base::operator*; + using Base::operator/; typedef typename Base::CoeffReturnType CoeffReturnType; typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType; @@ -101,23 +102,11 @@ template class MatrixBase EIGEN_DEVICE_FUNC inline Index diagonalSize() const { return (std::min)(rows(),cols()); } - /** \brief The plain matrix type corresponding to this expression. - * - * This is not necessarily exactly the return type of eval(). In the case of plain matrices, - * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed - * that the return type of eval() is either PlainObject or const PlainObject&. - */ - typedef Matrix::Scalar, - internal::traits::RowsAtCompileTime, - internal::traits::ColsAtCompileTime, - AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), - internal::traits::MaxRowsAtCompileTime, - internal::traits::MaxColsAtCompileTime - > PlainObject; + typedef typename Base::PlainObject PlainObject; #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; /** \internal the return type of MatrixBase::adjoint() */ typedef typename internal::conditional::IsComplex, CwiseUnaryOp, ConstTransposeReturnType>, @@ -126,7 +115,7 @@ template class MatrixBase /** \internal Return type of eigenvalues() */ typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; /** \internal the return type of identity */ - typedef CwiseNullaryOp,Derived> IdentityReturnType; + typedef CwiseNullaryOp,PlainObject> IdentityReturnType; /** \internal the return type of unit vectors */ typedef Block, SquareMatrixType>, internal::traits::RowsAtCompileTime, diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h old mode 100644 new mode 100755 index 2f199bc29..43ba86193 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -751,7 +751,6 @@ struct product_evaluator, ProductTag, DiagonalSha using Base::m_diagImpl; using Base::m_matImpl; using Base::coeff; - using Base::packet_impl; typedef typename Base::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; @@ -776,7 +775,8 @@ struct product_evaluator, ProductTag, DiagonalSha template EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const { - // NVCC complains about template keyword, so we disable this function in CUDA mode + // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. + // See also similar calls below. return this->template packet_impl(row,col, row, typename internal::conditional::type()); } @@ -798,7 +798,6 @@ struct product_evaluator, ProductTag, DenseShape, using Base::m_diagImpl; using Base::m_matImpl; using Base::coeff; - using Base::packet_impl; typedef typename Base::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; diff --git a/Eigen/src/Core/Random.h b/Eigen/src/Core/Random.h index cf2a82877..02038e9e3 100644 --- a/Eigen/src/Core/Random.h +++ b/Eigen/src/Core/Random.h @@ -53,7 +53,7 @@ struct functor_traits > * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random() */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index rows, Index cols) { return NullaryExpr(rows, cols, internal::scalar_random_op()); @@ -84,7 +84,7 @@ DenseBase::Random(Index rows, Index cols) * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random() */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index size) { return NullaryExpr(size, internal::scalar_random_op()); @@ -110,7 +110,7 @@ DenseBase::Random(Index size) * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index) */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random() { return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op()); diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index 993458aa5..ef301e66d 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -70,10 +70,6 @@ template class Reverse typedef typename internal::remove_all::type NestedExpression; using Base::IsRowMajor; - // next line is necessary because otherwise const version of operator() - // is hidden by non-const version defined in this file - using Base::operator(); - protected: enum { PacketSize = internal::packet_traits::size, @@ -101,69 +97,6 @@ template class Reverse return -m_matrix.innerStride(); } - EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col) - { - eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return coeffRef(row, col); - } - - EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row, - ReverseCol ? m_matrix.cols() - col - 1 : col); - } - - EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index col) const - { - return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, - ReverseCol ? m_matrix.cols() - col - 1 : col); - } - - EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index index) const - { - return m_matrix.coeff(m_matrix.size() - index - 1); - } - - EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) - { - return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1); - } - - EIGEN_DEVICE_FUNC inline Scalar& operator()(Index index) - { - eigen_assert(index >= 0 && index < m_matrix.size()); - return coeffRef(index); - } - - template - inline const PacketScalar packet(Index row, Index col) const - { - return reverse_packet::run(m_matrix.template packet( - ReverseRow ? m_matrix.rows() - row - OffsetRow : row, - ReverseCol ? m_matrix.cols() - col - OffsetCol : col)); - } - - template - inline void writePacket(Index row, Index col, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket( - ReverseRow ? m_matrix.rows() - row - OffsetRow : row, - ReverseCol ? m_matrix.cols() - col - OffsetCol : col, - reverse_packet::run(x)); - } - - template - inline const PacketScalar packet(Index index) const - { - return internal::preverse(m_matrix.template packet( m_matrix.size() - index - PacketSize )); - } - - template - inline void writePacket(Index index, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(m_matrix.size() - index - PacketSize, internal::preverse(x)); - } - EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index 83a973365..f9cd01b7e 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -157,6 +157,7 @@ inline typename NumTraits::Scalar>::Real MatrixBase::stableNorm() const { using std::sqrt; + using std::abs; const Index blockSize = 4096; RealScalar scale(0); RealScalar invScale(1); @@ -164,12 +165,18 @@ MatrixBase::stableNorm() const enum { Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0 }; + typedef typename internal::conditional, Aligned>, + typename Base::ConstSegmentReturnType>::type SegmentWrapper; Index n = size(); + + if(n==1) + return abs(this->coeff(0)); + Index bi = internal::first_aligned(derived()); if (bi>0) internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale); for (; bisegment(bi,numext::mini(blockSize, n - bi)).template forceAlignedAccessIf(), ssq, scale, invScale); + internal::stable_norm_kernel(SegmentWrapper(this->segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale); return scale * sqrt(ssq); } diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 85e605889..5dee6247e 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -392,6 +392,18 @@ template struct functor_traits > { enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; +template +struct scalar_quotient2_op { + typedef typename scalar_product_traits::ReturnType result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; } + typename add_const_on_value_type::Nested>::type m_other; +}; +template +struct functor_traits > +{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; + // In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication // where the mixing of different types is handled by scalar_product_traits // In particular, real * complex is allowed. diff --git a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h old mode 100644 new mode 100755 index 1cb9fe6b5..12c3d13bd --- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h +++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h @@ -46,38 +46,37 @@ namespace internal { // gemv specialization -template -struct general_matrix_vector_product_gemv : - general_matrix_vector_product {}; +template +struct general_matrix_vector_product_gemv; #define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ template \ -struct general_matrix_vector_product { \ +struct general_matrix_vector_product,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper,ConjugateRhs,Specialized> { \ static void run( \ Index rows, Index cols, \ - const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsIncr, \ + const const_blas_data_mapper &lhs, \ + const const_blas_data_mapper &rhs, \ Scalar* res, Index resIncr, Scalar alpha) \ { \ if (ConjugateLhs) { \ - general_matrix_vector_product::run( \ - rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + general_matrix_vector_product,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper,ConjugateRhs,BuiltIn>::run( \ + rows, cols, lhs, rhs, res, resIncr, alpha); \ } else { \ general_matrix_vector_product_gemv::run( \ - rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \ } \ } \ }; \ template \ -struct general_matrix_vector_product { \ +struct general_matrix_vector_product,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper,ConjugateRhs,Specialized> { \ static void run( \ Index rows, Index cols, \ - const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsIncr, \ + const const_blas_data_mapper &lhs, \ + const const_blas_data_mapper &rhs, \ Scalar* res, Index resIncr, Scalar alpha) \ { \ general_matrix_vector_product_gemv::run( \ - rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ + rows, cols, lhs.data(), lhs.stride(), rhs.data(), rhs.stride(), res, resIncr, alpha); \ } \ }; \ diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h old mode 100644 new mode 100755 index 4cc56a42f..d9e7cf852 --- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h @@ -122,7 +122,7 @@ struct product_triangular_matrix_matrix_trmm > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ MatrixLhs aa_tmp=lhsMap.template triangularView(); \ MKL_INT aStride = aa_tmp.outerStride(); \ - gemm_blocking_space gemm_blocking(_rows,_cols,_depth); \ + gemm_blocking_space gemm_blocking(_rows,_cols,_depth, 1, true); \ general_matrix_matrix_product::run( \ rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \ \ @@ -236,7 +236,7 @@ struct product_triangular_matrix_matrix_trmm > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ MatrixRhs aa_tmp=rhsMap.template triangularView(); \ MKL_INT aStride = aa_tmp.outerStride(); \ - gemm_blocking_space gemm_blocking(_rows,_cols,_depth); \ + gemm_blocking_space gemm_blocking(_rows,_cols,_depth, 1, true); \ general_matrix_matrix_product::run( \ rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \ \ diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h old mode 100644 new mode 100755 index ffeb5ac5f..934948ebd --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -224,6 +224,7 @@ class blas_data_mapper { } const Index stride() const { return m_stride; } + const Scalar* data() const { return m_data; } Index firstAligned(Index size) const { if (size_t(m_data)%sizeof(Scalar)) { diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index ba75d25af..7c20fed5e 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -213,6 +213,7 @@ template struct scalar_identity_op; template struct scalar_product_op; template struct scalar_multiple2_op; template struct scalar_quotient_op; +template struct scalar_quotient2_op; } // end namespace internal diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 8efe2f4ca..8cffb3f89 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -645,7 +645,7 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1800 // for older MSVC versions using the base operator is sufficient (cf Bug 1000) +#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 2cd78576e..8c280432b 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -427,7 +427,9 @@ struct special_scalar_op_base : public DenseCoeffsBase { // dummy operator* so that the // "using special_scalar_op_base::operator*" compiles - void operator*() const; + struct dummy {}; + void operator*(dummy) const; + void operator/(dummy) const; }; template @@ -451,6 +453,16 @@ struct special_scalar_op_base : public DenseCo #endif return static_cast(matrix).operator*(scalar); } + + const CwiseUnaryOp, Derived> + operator/(const OtherScalar& scalar) const + { +#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN + EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN +#endif + return CwiseUnaryOp, Derived> + (*static_cast(this), scalar_quotient2_op(scalar)); + } }; template struct cast_return_type diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h index e46c16391..872866850 100644 --- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h @@ -486,10 +486,11 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag while (end>0) { + EIGEN_ASM_COMMENT("beginabs"); for (Index i = start; i0 && subdiag[end-1]==0) { diff --git a/Eigen/src/Eigenvalues/Tridiagonalization.h b/Eigen/src/Eigenvalues/Tridiagonalization.h index a6fb00b21..f21331312 100644 --- a/Eigen/src/Eigenvalues/Tridiagonalization.h +++ b/Eigen/src/Eigenvalues/Tridiagonalization.h @@ -464,9 +464,10 @@ struct tridiagonalization_inplace_selector static void run(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ) { using std::sqrt; + const RealScalar tol = (std::numeric_limits::min)(); diag[0] = mat(0,0); RealScalar v1norm2 = numext::abs2(mat(2,0)); - if(v1norm2 == RealScalar(0)) + if(v1norm2 <= tol) { diag[1] = mat(1,1); diag[2] = mat(2,2); diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h index 6b2e57392..39b64b869 100644 --- a/Eigen/src/Geometry/OrthoMethods.h +++ b/Eigen/src/Geometry/OrthoMethods.h @@ -18,6 +18,10 @@ namespace Eigen { * \returns the cross product of \c *this and \a other * * Here is a very good explanation of cross-product: http://xkcd.com/199/ + * + * With complex numbers, the cross product is implemented as + * \f$ (\mathbf{a}+i\mathbf{b}) \times (\mathbf{c}+i\mathbf{d}) = (\mathbf{a} \times \mathbf{c} - \mathbf{b} \times \mathbf{d}) - i(\mathbf{a} \times \mathbf{d} - \mathbf{b} \times \mathbf{c})\f$ + * * \sa MatrixBase::cross3() */ template diff --git a/Eigen/src/Householder/Householder.h b/Eigen/src/Householder/Householder.h index 32112af9b..4c1f499a1 100644 --- a/Eigen/src/Householder/Householder.h +++ b/Eigen/src/Householder/Householder.h @@ -75,8 +75,9 @@ void MatrixBase::makeHouseholder( RealScalar tailSqNorm = size()==1 ? RealScalar(0) : tail.squaredNorm(); Scalar c0 = coeff(0); + const RealScalar tol = (std::numeric_limits::min)(); - if(tailSqNorm == RealScalar(0) && numext::imag(c0)==RealScalar(0)) + if(tailSqNorm <= tol && numext::abs2(numext::imag(c0))<=tol) { tau = RealScalar(0); beta = numext::real(c0); diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index be98993f0..a34ee7628 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -136,6 +136,12 @@ struct traits > * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations * and NumTraits::epsilon() for the tolerance. * + * The tolerance corresponds to the relative residual error: |Ax-b|/|b| + * + * \b Performance: when using sparse matrices, best performance is achied for a row-major sparse matrix format. + * Moreover, in this case multi-threading can be exploited if the user code is compiled with OpenMP enabled. + * See \ref TopicMultiThreading for details. + * * This class can be used as the direct solver classes. Here is a typical usage example: * \include BiCGSTAB_simple.cpp * diff --git a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h index 9e7dd1404..8f33c446d 100644 --- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h @@ -114,20 +114,28 @@ struct traits > * * \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix. * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower, - * Upper, or Lower|Upper in which the full matrix entries will be considered. Default is Lower. + * \c Upper, or \c Lower|Upper in which the full matrix entries will be considered. + * Default is \c Lower, best performance is \c Lower|Upper. * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner * * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations * and NumTraits::epsilon() for the tolerance. * + * The tolerance corresponds to the relative residual error: |Ax-b|/|b| + * + * \b Performance: Even though the default value of \c _UpLo is \c Lower, significantly higher performance is + * achieved when using a complete matrix and \b Lower|Upper as the \a _UpLo template parameter. Moreover, in this + * case multi-threading can be exploited if the user code is compiled with OpenMP enabled. + * See \ref TopicMultiThreading for details. + * * This class can be used as the direct solver classes. Here is a typical usage example: \code int n = 10000; VectorXd x(n), b(n); SparseMatrix A(n,n); // fill A and b - ConjugateGradient > cg; + ConjugateGradient, Lower|Upper> cg; cg.compute(A); x = cg.solve(b); std::cout << "#iterations: " << cg.iterations() << std::endl; @@ -183,10 +191,13 @@ public: template void _solve_with_guess_impl(const Rhs& b, Dest& x) const { + typedef Ref MatRef; + typedef typename internal::conditional::IsComplex), + Transpose, MatRef const&>::type RowMajorWrapper; typedef typename internal::conditional&, - typename Ref::template ConstSelfAdjointViewReturnType::Type - >::type MatrixWrapperType; + RowMajorWrapper, + typename MatRef::template ConstSelfAdjointViewReturnType::Type + >::type SelfAdjointWrapper; m_iterations = Base::maxIterations(); m_error = Base::m_tolerance; @@ -196,7 +207,8 @@ public: m_error = Base::m_tolerance; typename Dest::ColXpr xj(x,j); - internal::conjugate_gradient(MatrixWrapperType(mp_matrix), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error); + RowMajorWrapper row_mat(mp_matrix); + internal::conjugate_gradient(SelfAdjointWrapper(row_mat), b.col(j), xj, Base::m_preconditioner, m_iterations, m_error); } m_isInitialized = true; diff --git a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h index 6477b9de2..22c602a91 100644 --- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h @@ -126,10 +126,16 @@ public: /** \internal */ Index cols() const { return mp_matrix.cols(); } - /** \returns the tolerance threshold used by the stopping criteria */ + /** \returns the tolerance threshold used by the stopping criteria. + * \sa setTolerance() + */ RealScalar tolerance() const { return m_tolerance; } - /** Sets the tolerance threshold used by the stopping criteria */ + /** Sets the tolerance threshold used by the stopping criteria. + * + * This value is used as an upper bound to the relative residual error: |Ax-b|/|b|. + * The default value is the machine precision given by NumTraits::epsilon() + */ Derived& setTolerance(const RealScalar& tolerance) { m_tolerance = tolerance; @@ -167,7 +173,9 @@ public: return m_iterations; } - /** \returns the tolerance error reached during the last solve */ + /** \returns the tolerance error reached during the last solve. + * It is a close approximation of the true relative residual error |Ax-b|/|b|. + */ RealScalar error() const { eigen_assert(m_isInitialized && "ConjugateGradient is not initialized."); diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h old mode 100644 new mode 100755 index 7ab2e3e6b..234e3213b --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -54,7 +54,7 @@ namespace internal template<> struct pardiso_run_selector { - typedef long long int IndexTypeType; + typedef long long int IndexType; static IndexType run( _MKL_DSS_HANDLE_t pt, IndexType maxfct, IndexType mnum, IndexType type, IndexType phase, IndexType n, void *a, IndexType *ia, IndexType *ja, IndexType *perm, IndexType nrhs, IndexType *iparm, IndexType msglvl, void *b, void *x) { @@ -93,19 +93,19 @@ namespace internal typedef typename _MatrixType::StorageIndex StorageIndex; }; -} +} // end namespace internal template -class PardisoImpl : public SparseSolveBase +class PardisoImpl : public SparseSolverBase { protected: - typedef SparseSolveBase Base; + typedef SparseSolverBase Base; using Base::derived; using Base::m_isInitialized; typedef internal::pardiso_traits Traits; public: - using base::_solve_impl; + using Base::_solve_impl; typedef typename Traits::MatrixType MatrixType; typedef typename Traits::Scalar Scalar; @@ -173,16 +173,17 @@ class PardisoImpl : public SparseSolveBase Derived& compute(const MatrixType& matrix); - template - bool _solve_impl(const MatrixBase &b, MatrixBase& x) const; + template + void _solve_impl(const MatrixBase &b, MatrixBase &dest) const; protected: void pardisoRelease() { if(m_isInitialized) // Factorization ran at least once { - internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, m_size, 0, 0, 0, m_perm.data(), 0, - m_iparm.data(), m_msglvl, 0, 0); + internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, m_size,0, 0, 0, m_perm.data(), 0, + m_iparm.data(), m_msglvl, NULL, NULL); + m_isInitialized = false; } } @@ -217,12 +218,14 @@ class PardisoImpl : public SparseSolveBase m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0; m_iparm[34] = 1; // C indexing m_iparm[59] = 1; // Automatic switch between In-Core and Out-of-Core modes + + memset(m_pt, 0, sizeof(m_pt)); } protected: // cached data to reduce reallocation, etc. - void manageErrorCode(Index error) + void manageErrorCode(Index error) const { switch(error) { @@ -239,7 +242,7 @@ class PardisoImpl : public SparseSolveBase } mutable SparseMatrixType m_matrix; - ComputationInfo m_info; + mutable ComputationInfo m_info; bool m_analysisIsOk, m_factorizationIsOk; Index m_type, m_msglvl; mutable void *m_pt[64]; @@ -256,7 +259,6 @@ Derived& PardisoImpl::compute(const MatrixType& a) eigen_assert(a.rows() == a.cols()); pardisoRelease(); - memset(m_pt, 0, sizeof(m_pt)); m_perm.setZero(m_size); derived().getMatrix(a); @@ -279,7 +281,6 @@ Derived& PardisoImpl::analyzePattern(const MatrixType& a) eigen_assert(m_size == a.cols()); pardisoRelease(); - memset(m_pt, 0, sizeof(m_pt)); m_perm.setZero(m_size); derived().getMatrix(a); @@ -313,12 +314,15 @@ Derived& PardisoImpl::factorize(const MatrixType& a) return derived(); } -template +template template -bool PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase& x) const +void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase& x) const { if(m_iparm[0] == 0) // Factorization was not computed - return false; + { + m_info = InvalidInput; + return; + } //Index n = m_matrix.rows(); Index nrhs = Index(b.cols()); @@ -353,7 +357,7 @@ bool PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase class PardisoLU : public PardisoImpl< PardisoLU > { protected: - typedef PardisoImpl< PardisoLU > Base; + typedef PardisoImpl Base; typedef typename Base::Scalar Scalar; typedef typename Base::RealScalar RealScalar; using Base::pardisoInit; @@ -401,6 +405,7 @@ class PardisoLU : public PardisoImpl< PardisoLU > void getMatrix(const MatrixType& matrix) { m_matrix = matrix; + m_matrix.makeCompressed(); } }; @@ -424,7 +429,6 @@ class PardisoLLT : public PardisoImpl< PardisoLLT > protected: typedef PardisoImpl< PardisoLLT > Base; typedef typename Base::Scalar Scalar; - typedef typename Base::StorageIndex StorageIndex; typedef typename Base::RealScalar RealScalar; using Base::pardisoInit; using Base::m_matrix; @@ -432,9 +436,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT > public: + typedef typename Base::StorageIndex StorageIndex; enum { UpLo = _UpLo }; using Base::compute; - using Base::solve; PardisoLLT() : Base() @@ -457,6 +461,7 @@ class PardisoLLT : public PardisoImpl< PardisoLLT > PermutationMatrix p_null; m_matrix.resize(matrix.rows(), matrix.cols()); m_matrix.template selfadjointView() = matrix.template selfadjointView().twistedBy(p_null); + m_matrix.makeCompressed(); } }; @@ -482,7 +487,6 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT > protected: typedef PardisoImpl< PardisoLDLT > Base; typedef typename Base::Scalar Scalar; - typedef typename Base::StorageIndex StorageIndex; typedef typename Base::RealScalar RealScalar; using Base::pardisoInit; using Base::m_matrix; @@ -490,8 +494,8 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT > public: + typedef typename Base::StorageIndex StorageIndex; using Base::compute; - using Base::solve; enum { UpLo = Options&(Upper|Lower) }; PardisoLDLT() @@ -513,6 +517,7 @@ class PardisoLDLT : public PardisoImpl< PardisoLDLT > PermutationMatrix p_null; m_matrix.resize(matrix.rows(), matrix.cols()); m_matrix.template selfadjointView() = matrix.template selfadjointView().twistedBy(p_null); + m_matrix.makeCompressed(); } }; diff --git a/Eigen/src/SparseCore/CompressedStorage.h b/Eigen/src/SparseCore/CompressedStorage.h index d667944ce..2199848e9 100644 --- a/Eigen/src/SparseCore/CompressedStorage.h +++ b/Eigen/src/SparseCore/CompressedStorage.h @@ -50,9 +50,9 @@ class CompressedStorage CompressedStorage& operator=(const CompressedStorage& other) { + resize(other.size()); if(other.size()>0) { - resize(other.size()); internal::smart_copy(other.m_values, other.m_values + m_size, m_values); internal::smart_copy(other.m_indices, other.m_indices + m_size, m_indices); } diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index 469c2b188..1ce60f385 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -16,8 +16,7 @@ template template Derived& SparseMatrixBase::operator=(const EigenBase &other) { - // TODO use the evaluator mechanism - other.derived().evalTo(derived()); + internal::call_assignment_no_alias(derived(), other.derived()); return derived(); } @@ -182,6 +181,39 @@ struct Assignment, internal::assign_op struct AssignmentKind { typedef Diagonal2Sparse Kind; }; + +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment +{ + typedef typename DstXprType::StorageIndex StorageIndex; + typedef Array ArrayXI; + typedef Array ArrayXS; + template + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + Index size = src.diagonal().size(); + dst.makeCompressed(); + dst.resizeNonZeros(size); + Map(dst.innerIndexPtr(), size).setLinSpaced(0,StorageIndex(size)-1); + Map(dst.outerIndexPtr(), size+1).setLinSpaced(0,StorageIndex(size)); + Map(dst.valuePtr(), size) = src.diagonal(); + } + + template + static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + dst.diagonal() = src.diagonal(); + } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { dst.diagonal() += src.diagonal(); } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { dst.diagonal() -= src.diagonal(); } +}; } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index 096af7fb0..78d16892d 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -390,6 +390,22 @@ SparseMatrixBase::operator+=(const SparseMatrixBase& othe return derived() = derived() + other.derived(); } +template +template +Derived& SparseMatrixBase::operator+=(const DiagonalBase& other) +{ + call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); + return derived(); +} + +template +template +Derived& SparseMatrixBase::operator-=(const DiagonalBase& other) +{ + call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); + return derived(); +} + template template EIGEN_STRONG_INLINE const EIGEN_SPARSE_CWISE_PRODUCT_RETURN_TYPE diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 731d40f29..b8d71d3f8 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -30,23 +30,48 @@ struct sparse_time_dense_product_impl::type Rhs; typedef typename internal::remove_all::type Res; typedef typename evaluator::InnerIterator LhsInnerIterator; + typedef typename evaluator::type LhsEval; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) { - typename evaluator::type lhsEval(lhs); + LhsEval lhsEval(lhs); + + Index n = lhs.outerSize(); +#ifdef EIGEN_HAS_OPENMP + Eigen::initParallel(); + Index threads = Eigen::nbThreads(); +#endif + for(Index c=0; c1 && lhs.nonZeros() > 20000) { - typename Res::Scalar tmp(0); - for(LhsInnerIterator it(lhsEval,j); it ;++it) - tmp += it.value() * rhs.coeff(it.index(),c); - res.coeffRef(j,c) += alpha * tmp; + #pragma omp parallel for schedule(static) num_threads(threads) + for(Index i=0; i struct scalar_product_traits > { diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index fb05fb4b6..f18829866 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -97,8 +97,8 @@ class SparseMatrix using Base::isCompressed; using Base::nonZeros; _EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix) - EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, +=) - EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=) + using Base::operator+=; + using Base::operator-=; typedef MappedSparseMatrix Map; typedef Diagonal DiagonalReturnType; @@ -695,6 +695,15 @@ class SparseMatrix initAssignment(other); other.evalTo(*this); } + + /** \brief Copy constructor with in-place evaluation */ + template + explicit SparseMatrix(const DiagonalBase& other) + : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) + { + check_template_parameters(); + *this = other.derived(); + } /** Swaps the content of two sparse matrices of the same type. * This is a fast operation that simply swaps the underlying pointers and parameters. */ diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index f1b5d2a97..4e720904e 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -242,6 +242,11 @@ template class SparseMatrixBase : public EigenBase Derived& operator+=(const SparseMatrixBase& other); template Derived& operator-=(const SparseMatrixBase& other); + + template + Derived& operator+=(const DiagonalBase& other); + template + Derived& operator-=(const DiagonalBase& other); Derived& operator*=(const Scalar& other); Derived& operator/=(const Scalar& other); @@ -367,6 +372,8 @@ template class SparseMatrixBase : public EigenBase static inline StorageIndex convert_index(const Index idx) { return internal::convert_index(idx); } + private: + template void evalTo(Dest &) const; }; } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 3da856799..dec96efb5 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -45,8 +45,13 @@ template class SparseSelfAdjointView { public: - enum { Mode = _Mode }; + enum { + Mode = _Mode, + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime + }; + typedef EigenBase Base; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::StorageIndex StorageIndex; typedef Matrix VectorI; @@ -116,20 +121,6 @@ template class SparseSelfAdjointView template SparseSelfAdjointView& rankUpdate(const SparseMatrixBase& u, const Scalar& alpha = Scalar(1)); - /** \internal triggered by sparse_matrix = SparseSelfadjointView; */ - template void evalTo(SparseMatrix& _dest) const - { - internal::permute_symm_to_fullsymm(m_matrix, _dest); - } - - template void evalTo(DynamicSparseMatrix& _dest) const - { - // TODO directly evaluate into _dest; - SparseMatrix tmp(_dest.rows(),_dest.cols()); - internal::permute_symm_to_fullsymm(m_matrix, tmp); - _dest = tmp; - } - /** \returns an expression of P H P^-1 */ // TODO implement twists in a more evaluator friendly fashion SparseSymmetricPermutationProduct<_MatrixTypeNested,Mode> twistedBy(const PermutationMatrix& perm) const @@ -140,7 +131,7 @@ template class SparseSelfAdjointView template SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct& permutedMatrix) { - permutedMatrix.evalTo(*this); + internal::call_assignment_no_alias_no_transpose(*this, permutedMatrix); return *this; } @@ -157,11 +148,21 @@ template class SparseSelfAdjointView return *this = src.twistedBy(pnull); } + void resize(Index rows, Index cols) + { + EIGEN_ONLY_USED_FOR_DEBUG(rows); + EIGEN_ONLY_USED_FOR_DEBUG(cols); + eigen_assert(rows == this->rows() && cols == this->cols() + && "SparseSelfadjointView::resize() does not actually allow to resize."); + } + protected: typename MatrixType::Nested m_matrix; //mutable VectorI m_countPerRow; //mutable VectorI m_countPerCol; + private: + template void evalTo(Dest &) const; }; /*************************************************************************** @@ -200,6 +201,47 @@ SparseSelfAdjointView::rankUpdate(const SparseMatrixBase +// in the future selfadjoint-ness should be defined by the expression traits +// such that Transpose > is valid. (currently TriangularBase::transpose() is overloaded to make it work) +template +struct evaluator_traits > +{ + typedef typename storage_kind_to_evaluator_kind::Kind Kind; + typedef SparseSelfAdjointShape Shape; + + static const int AssumeAliasing = 0; +}; + +struct SparseSelfAdjoint2Sparse {}; + +template<> struct AssignmentKind { typedef SparseSelfAdjoint2Sparse Kind; }; +template<> struct AssignmentKind { typedef Sparse2Sparse Kind; }; + +template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> +struct Assignment +{ + typedef typename DstXprType::StorageIndex StorageIndex; + template + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + internal::permute_symm_to_fullsymm(src.matrix(), dst); + } + + template + static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + // TODO directly evaluate into dst; + SparseMatrix tmp(dst.rows(),dst.cols()); + internal::permute_symm_to_fullsymm(src.matrix(), tmp); + dst = tmp; + } +}; + +} // end namespace internal + /*************************************************************************** * Implementation of sparse self-adjoint time dense matrix ***************************************************************************/ @@ -252,18 +294,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons res.row(j) += i.value() * rhs.row(j); } } - -// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.> -// in the future selfadjoint-ness should be defined by the expression traits -// such that Transpose > is valid. (currently TriangularBase::transpose() is overloaded to make it work) -template -struct evaluator_traits > -{ - typedef typename storage_kind_to_evaluator_kind::Kind Kind; - typedef SparseSelfAdjointShape Shape; - - static const int AssumeAliasing = 0; -}; + template struct generic_product_impl @@ -519,12 +550,16 @@ class SparseSymmetricPermutationProduct public: typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::StorageIndex StorageIndex; + enum { + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime + }; protected: typedef PermutationMatrix Perm; public: typedef Matrix VectorI; typedef typename MatrixType::Nested MatrixTypeNested; - typedef typename internal::remove_all::type _MatrixTypeNested; + typedef typename internal::remove_all::type NestedExpression; SparseSymmetricPermutationProduct(const MatrixType& mat, const Perm& perm) : m_matrix(mat), m_perm(perm) @@ -532,20 +567,9 @@ class SparseSymmetricPermutationProduct inline Index rows() const { return m_matrix.rows(); } inline Index cols() const { return m_matrix.cols(); } - - template - void evalTo(SparseMatrix& _dest) const - { -// internal::permute_symm_to_fullsymm(m_matrix,_dest,m_perm.indices().data()); - SparseMatrix tmp; - internal::permute_symm_to_fullsymm(m_matrix,tmp,m_perm.indices().data()); - _dest = tmp; - } - - template void evalTo(SparseSelfAdjointView& dest) const - { - internal::permute_symm_to_symm(m_matrix,dest.matrix(),m_perm.indices().data()); - } + + const NestedExpression& matrix() const { return m_matrix; } + const Perm& perm() const { return m_perm; } protected: MatrixTypeNested m_matrix; @@ -553,6 +577,31 @@ class SparseSymmetricPermutationProduct }; +namespace internal { + +template +struct Assignment, internal::assign_op, Sparse2Sparse> +{ + typedef SparseSymmetricPermutationProduct SrcXprType; + typedef typename DstXprType::StorageIndex DstIndex; + template + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) + { + // internal::permute_symm_to_fullsymm(m_matrix,_dest,m_perm.indices().data()); + SparseMatrix tmp; + internal::permute_symm_to_fullsymm(src.matrix(),tmp,src.perm().indices().data()); + dst = tmp; + } + + template + static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) + { + internal::permute_symm_to_symm(src.matrix(),dst.matrix(),src.perm().indices().data()); + } +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_SPARSE_SELFADJOINTVIEW_H diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h index 215601aa6..d53a9cb17 100644 --- a/Eigen/src/SparseCore/SparseUtil.h +++ b/Eigen/src/SparseCore/SparseUtil.h @@ -37,11 +37,7 @@ EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \ } #define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ -EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =) \ -EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, +=) \ -EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, -=) \ -EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, *=) \ -EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=) +EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =) // TODO this is mostly the same as EIGEN_GENERIC_PUBLIC_INTERFACE #define _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \ diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 1cf8bebc7..4dc42e87b 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -152,8 +152,8 @@ Index SparseLUImpl::memInit(Index m, Index n, Index annz, I { Index& num_expansions = glu.num_expansions; //No memory expansions so far num_expansions = 0; - glu.nzumax = glu.nzlumax = (std::min)(fillratio * annz / n, m) * n; // estimated number of nonzeros in U - glu.nzlmax = (std::max)(Index(4), fillratio) * annz / 4; // estimated nnz in L factor + glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U + glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated nnz in L factor // Return the estimated size to the user if necessary Index tempSpace; tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar); diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index ce4a70454..548b3f9b0 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -23,6 +23,10 @@ namespace internal { typedef typename SparseQRType::MatrixType ReturnType; typedef typename ReturnType::StorageIndex StorageIndex; typedef typename ReturnType::StorageKind StorageKind; + enum { + RowsAtCompileTime = Dynamic, + ColsAtCompileTime = Dynamic + }; }; template struct traits > { @@ -235,8 +239,9 @@ class SparseQR : public SparseSolverBase > return m_info; } - protected: - inline void sort_matrix_Q() + + /** \internal */ + inline void _sort_matrix_Q() { if(this->m_isQSorted) return; // The matrix Q is sorted during the transposition @@ -267,7 +272,6 @@ class SparseQR : public SparseSolverBase > bool m_isEtreeOk; // whether the elimination tree match the initial input matrix template friend struct SparseQR_QProduct; - template friend struct SparseQRMatrixQReturnType; }; @@ -635,6 +639,10 @@ struct SparseQRMatrixQReturnType : public EigenBase DenseMatrix; + enum { + RowsAtCompileTime = Dynamic, + ColsAtCompileTime = Dynamic + }; explicit SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {} template SparseQR_QProduct operator*(const MatrixBase& other) @@ -652,19 +660,6 @@ struct SparseQRMatrixQReturnType : public EigenBase(m_qr); } - template void evalTo(MatrixBase& dest) const - { - dest.derived() = m_qr.matrixQ() * Dest::Identity(m_qr.rows(), m_qr.rows()); - } - template void evalTo(SparseMatrixBase& dest) const - { - Dest idMat(m_qr.rows(), m_qr.rows()); - idMat.setIdentity(); - // Sort the sparse householder reflectors if needed - const_cast(&m_qr)->sort_matrix_Q(); - dest.derived() = SparseQR_QProduct(m_qr, idMat, false); - } - const SparseQRType& m_qr; }; @@ -680,6 +675,47 @@ struct SparseQRMatrixQTransposeReturnType const SparseQRType& m_qr; }; +namespace internal { + +template +struct evaluator_traits > +{ + typedef typename SparseQRType::MatrixType MatrixType; + typedef typename storage_kind_to_evaluator_kind::Kind Kind; + typedef SparseShape Shape; + static const int AssumeAliasing = 0; +}; + +template< typename DstXprType, typename SparseQRType> +struct Assignment, internal::assign_op, Sparse2Sparse> +{ + typedef SparseQRMatrixQReturnType SrcXprType; + typedef typename DstXprType::Scalar Scalar; + typedef typename DstXprType::StorageIndex StorageIndex; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows()); + idMat.setIdentity(); + // Sort the sparse householder reflectors if needed + const_cast(&src.m_qr)->_sort_matrix_Q(); + dst = SparseQR_QProduct(src.m_qr, idMat, false); + } +}; + +template< typename DstXprType, typename SparseQRType> +struct Assignment, internal::assign_op, Sparse2Dense> +{ + typedef SparseQRMatrixQReturnType SrcXprType; + typedef typename DstXprType::Scalar Scalar; + typedef typename DstXprType::StorageIndex StorageIndex; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows()); + } +}; + +} // end namespace internal + } // end namespace Eigen #endif diff --git a/cmake/EigenDetermineVSServicePack.cmake b/cmake/EigenDetermineVSServicePack.cmake index a1a7348f6..fed78194d 100644 --- a/cmake/EigenDetermineVSServicePack.cmake +++ b/cmake/EigenDetermineVSServicePack.cmake @@ -26,6 +26,8 @@ function(EigenDetermineVSServicePack _pack) set(_sp "vc110sp2") elseif(${_cl_version} VERSION_EQUAL "17.00.60610.1") set(_sp "vc110sp3") + else() + set(_sp ${CMAKE_CXX_COMPILER_VERSION}) endif() endif() diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index f8aec777d..0774b5d81 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -367,7 +367,7 @@ macro(ei_get_compilerver VAR) # on all other system we rely on ${CMAKE_CXX_COMPILER} # supporting a "--version" or "/version" flag - if(WIN32 AND NOT CYGWIN) + if(WIN32 AND NOT CYGWIN AND NOT MINGW) set(EIGEN_CXX_FLAG_VERSION "/version") else() set(EIGEN_CXX_FLAG_VERSION "--version") diff --git a/doc/QuickReference.dox b/doc/QuickReference.dox index a4be0f68a..c90c9e9ce 100644 --- a/doc/QuickReference.dox +++ b/doc/QuickReference.dox @@ -13,17 +13,17 @@ The Eigen library is divided in a Core module and several additional modules. Ea - + - - - - - - - - - + + + + + + + + +
ModuleHeader fileContents
\link Core_Module Core \endlink\code#include \endcodeMatrix and Array classes, basic linear algebra (including triangular and selfadjoint products), array manipulation
\link Core_Module Core \endlink\code#include \endcodeMatrix and Array classes, basic linear algebra (including triangular and selfadjoint products), array manipulation
\link Geometry_Module Geometry \endlink\code#include \endcodeTransform, Translation, Scaling, Rotation2D and 3D rotations (Quaternion, AngleAxis)
\link LU_Module LU \endlink\code#include \endcodeInverse, determinant, LU decompositions with solver (FullPivLU, PartialPivLU)
\link Cholesky_Module Cholesky \endlink\code#include \endcodeLLT and LDLT Cholesky factorization with solver
\link Householder_Module Householder \endlink\code#include \endcodeHouseholder transformations; this module is used by several linear algebra modules
\link SVD_Module SVD \endlink\code#include \endcodeSVD decomposition with least-squares solver (JacobiSVD)
\link QR_Module QR \endlink\code#include \endcodeQR decomposition with solver (HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR)
\link Eigenvalues_Module Eigenvalues \endlink\code#include \endcodeEigenvalue, eigenvector decompositions (EigenSolver, SelfAdjointEigenSolver, ComplexEigenSolver)
\link Sparse_modules Sparse \endlink\code#include \endcode%Sparse matrix storage and related basic linear algebra (SparseMatrix, DynamicSparseMatrix, SparseVector)
\code#include \endcodeIncludes Core, Geometry, LU, Cholesky, SVD, QR, and Eigenvalues header files
\code#include \endcodeIncludes %Dense and %Sparse header files (the whole Eigen library)
\link LU_Module LU \endlink\code#include \endcodeInverse, determinant, LU decompositions with solver (FullPivLU, PartialPivLU)
\link Cholesky_Module Cholesky \endlink\code#include \endcodeLLT and LDLT Cholesky factorization with solver
\link Householder_Module Householder \endlink\code#include \endcodeHouseholder transformations; this module is used by several linear algebra modules
\link SVD_Module SVD \endlink\code#include \endcodeSVD decompositions with least-squares solver (JacobiSVD, BDCSVD)
\link QR_Module QR \endlink\code#include \endcodeQR decomposition with solver (HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR)
\link Eigenvalues_Module Eigenvalues \endlink\code#include \endcodeEigenvalue, eigenvector decompositions (EigenSolver, SelfAdjointEigenSolver, ComplexEigenSolver)
\link Sparse_modules Sparse \endlink\code#include \endcode%Sparse matrix storage and related basic linear algebra (SparseMatrix, SparseVector) \n (see \ref SparseQuickRefPage for details on sparse modules)
\code#include \endcodeIncludes Core, Geometry, LU, Cholesky, SVD, QR, and Eigenvalues header files
\code#include \endcodeIncludes %Dense and %Sparse header files (the whole Eigen library)
top @@ -364,32 +364,10 @@ vec3 = vec1.cross(vec2);\endcode top \section QuickRef_Coeffwise Coefficient-wise \& Array operators -Coefficient-wise operators for matrices and vectors: - - - -
Matrix API \matrixworldVia Array conversions
\code -mat1.cwiseMin(mat2) -mat1.cwiseMax(mat2) -mat1.cwiseAbs2() -mat1.cwiseAbs() -mat1.cwiseSqrt() -mat1.cwiseProduct(mat2) -mat1.cwiseQuotient(mat2)\endcode -\code -mat1.array().min(mat2.array()) -mat1.array().max(mat2.array()) -mat1.array().abs2() -mat1.array().abs() -mat1.array().sqrt() -mat1.array() * mat2.array() -mat1.array() / mat2.array() -\endcode
-It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with std::ptr_fun: -\code mat1.unaryExpr(std::ptr_fun(foo))\endcode - -Array operators:\arrayworld +In addition to the aforementioned operators, Eigen supports numerous coefficient-wise operator and functions. +Most of them unambiguously makes sense in array-world\arrayworld. The following operators are readily available for arrays, +or available through .array() for vectors and matrices: -
Arithmetic operators\code @@ -400,28 +378,107 @@ array1 + scalar array1 - scalar array1 += scalar array1 -= scalar array1 < array2 array1 > array2 array1 < scalar array1 > scalar array1 <= array2 array1 >= array2 array1 <= scalar array1 >= scalar array1 == array2 array1 != array2 array1 == scalar array1 != scalar +array1.min(array2) array1.max(array2) array1.min(scalar) array1.max(scalar) \endcode
Trigo, power, and \n misc functions \n and the STL variants\code -array1.min(array2) -array1.max(array2) +
Trigo, power, and \n misc functions \n and the STL-like variants\code array1.abs2() array1.abs() abs(array1) array1.sqrt() sqrt(array1) array1.log() log(array1) +array1.log10() log10(array1) array1.exp() exp(array1) -array1.pow(exponent) pow(array1,exponent) +array1.pow(array2) pow(array1,array2) +array1.pow(scalar) pow(array1,scalar) array1.square() array1.cube() array1.inverse() + array1.sin() sin(array1) array1.cos() cos(array1) array1.tan() tan(array1) array1.asin() asin(array1) array1.acos() acos(array1) +array1.atan() atan(array1) +array1.sinh() sinh(array1) +array1.cosh() cosh(array1) +array1.tanh() tanh(array1) +array1.arg() arg(array1) + +array1.floor() floor(array1) +array1.ceil() ceil(array1) +array1.round() round(aray1) + +array1.isFinite() isfinite(array1) +array1.isInf() isinf(array1) +array1.isNaN() isnan(array1) \endcode
+ +The following coefficient-wise operators are available for all kind of expressions (matrices, vectors, and arrays), and for both real or complex scalar types: + + + + +
Eigen's APISTL-like APIs\arrayworld Comments
\code +mat1.real() +mat1.imag() +mat1.conjugate() +\endcode +\code +real(array1) +imag(array1) +conj(array1) +\endcode + +\code + // read-write, no-op for real expressions + // read-only for real, read-write for complexes + // no-op for real expressions +\endcode +
+ +Some coefficient-wise operators are readily available for for matrices and vectors through the following cwise* methods: + + + +
Matrix API \matrixworldVia Array conversions
\code +mat1.cwiseMin(mat2) mat1.cwiseMin(scalar) +mat1.cwiseMax(mat2) mat1.cwiseMax(scalar) +mat1.cwiseAbs2() +mat1.cwiseAbs() +mat1.cwiseSqrt() +mat1.cwiseInverse() +mat1.cwiseProduct(mat2) +mat1.cwiseQuotient(mat2) +mat1.cwiseEqual(mat2) mat1.cwiseEqual(scalar) +mat1.cwiseNotEqual(mat2) +\endcode +\code +mat1.array().min(mat2.array()) mat1.array().min(scalar) +mat1.array().max(mat2.array()) mat1.array().max(scalar) +mat1.array().abs2() +mat1.array().abs() +mat1.array().sqrt() +mat1.array().inverse() +mat1.array() * mat2.array() +mat1.array() / mat2.array() +mat1.array() == mat2.array() mat1.array() == scalar +mat1.array() != mat2.array() +\endcode
+The main difference between the two API is that the one based on cwise* methods returns an expression in the matrix world, +while the second one (based on .array()) returns an array expression. +Recall that .array() has no cost, it only changes the available API and interpretation of the data. + +It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with std::ptr_fun (c++03), std::ref (c++11), or lambdas (c++11): +\code +mat1.unaryExpr(std::ptr_fun(foo)); +mat1.unaryExpr(std::ref(foo)); +mat1.unaryExpr([](double x) { return foo(x); }); +\endcode + + top \section QuickRef_Reductions Reductions diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox index 13741280a..48c18f46f 100644 --- a/doc/SparseLinearSystems.dox +++ b/doc/SparseLinearSystems.dox @@ -21,7 +21,7 @@ They are summarized in the following table: ConjugateGradient\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkClassic iterative CGSPDPreconditionning built-in, MPL2 Recommended for large symmetric problems (e.g., 3D Poisson eq.) -LSCG\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkCG for rectangular least-square problemRectangularPreconditionning +LeastSquaresConjugateGradient\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkCG for rectangular least-square problemRectangularPreconditionning built-in, MPL2 Solve for min |A'Ax-b|^2 without forming A'A BiCGSTAB\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkIterative stabilized bi-conjugate gradientSquarePreconditionning diff --git a/doc/TopicMultithreading.dox b/doc/TopicMultithreading.dox index e22e1c613..66028d7a8 100644 --- a/doc/TopicMultithreading.dox +++ b/doc/TopicMultithreading.dox @@ -22,8 +22,12 @@ n = Eigen::nbThreads( ); You can disable Eigen's multi threading at compile time by defining the EIGEN_DONT_PARALLELIZE preprocessor token. Currently, the following algorithms can make use of multi-threading: - * general matrix - matrix products - * PartialPivLU + - general dense matrix - matrix products + - PartialPivLU + - row-major-sparse * dense vector/matrix products + - ConjugateGradient with \c Lower|Upper as the \c UpLo template parameter. + - BiCGSTAB with a row-major sparse matrix format. + - LeastSquaresConjugateGradient \section TopicMultiThreading_UsingEigenWithMT Using Eigen in a multi-threaded application diff --git a/test/array.cpp b/test/array.cpp index b0210c682..8cd4fd888 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -124,8 +124,11 @@ template void comparisons(const ArrayType& m) c = internal::random(0, cols-1); ArrayType m1 = ArrayType::Random(rows, cols), - m2 = ArrayType::Random(rows, cols), - m3(rows, cols); + m2 = ArrayType::Random(rows, cols), + m3(rows, cols), + m4 = m1; + + m4 = (m4.abs()==Scalar(0)).select(1,m4); VERIFY(((m1 + Scalar(1)) > m1).all()); VERIFY(((m1 - Scalar(1)) < m1).all()); @@ -197,7 +200,10 @@ template void array_real(const ArrayType& m) ArrayType m1 = ArrayType::Random(rows, cols), m2 = ArrayType::Random(rows, cols), - m3(rows, cols); + m3(rows, cols), + m4 = m1; + + m4 = (m4.abs()==Scalar(0)).select(1,m4); Scalar s1 = internal::random(); @@ -215,9 +221,9 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.round(), round(m1)); VERIFY_IS_APPROX(m1.floor(), floor(m1)); VERIFY_IS_APPROX(m1.ceil(), ceil(m1)); - VERIFY((m1.isNaN() == isnan(m1)).all()); - VERIFY((m1.isInf() == isinf(m1)).all()); - VERIFY((m1.isFinite() == isfinite(m1)).all()); + VERIFY((m1.isNaN() == Eigen::isnan(m1)).all()); + VERIFY((m1.isInf() == Eigen::isinf(m1)).all()); + VERIFY((m1.isFinite() == Eigen::isfinite(m1)).all()); VERIFY_IS_APPROX(m1.inverse(), inverse(m1)); VERIFY_IS_APPROX(m1.abs(), abs(m1)); VERIFY_IS_APPROX(m1.abs2(), abs2(m1)); @@ -243,9 +249,9 @@ template void array_real(const ArrayType& m) VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); VERIFY_IS_APPROX(arg(m1), ((ArrayType)(m1<0))*std::acos(-1.0)); VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all()); - VERIFY(isnan(m1*0.0/0.0).all()); - VERIFY(isinf(m1/0.0).all()); - VERIFY((isfinite(m1) && !isfinite(m1*0.0/0.0) && !isfinite(m1/0.0)).all()); + VERIFY(Eigen::isnan((m1*0.0)/0.0).all()); + VERIFY(Eigen::isinf(m4/0.0).all()); + VERIFY((Eigen::isfinite(m1) && (!Eigen::isfinite(m1*0.0/0.0)) && (!Eigen::isfinite(m4/0.0))).all()); VERIFY_IS_APPROX(inverse(inverse(m1)),m1); VERIFY((abs(m1) == m1 || abs(m1) == -m1).all()); VERIFY_IS_APPROX(m3, sqrt(abs2(m1))); @@ -299,7 +305,11 @@ template void array_complex(const ArrayType& m) Index cols = m.cols(); ArrayType m1 = ArrayType::Random(rows, cols), - m2(rows, cols); + m2(rows, cols), + m4 = m1; + + m4.real() = (m4.real().abs()==RealScalar(0)).select(RealScalar(1),m4.real()); + m4.imag() = (m4.imag().abs()==RealScalar(0)).select(RealScalar(1),m4.imag()); Array m3(rows, cols); @@ -317,9 +327,9 @@ template void array_complex(const ArrayType& m) VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); VERIFY_IS_APPROX(m1.arg(), arg(m1)); - VERIFY((m1.isNaN() == isnan(m1)).all()); - VERIFY((m1.isInf() == isinf(m1)).all()); - VERIFY((m1.isFinite() == isfinite(m1)).all()); + VERIFY((m1.isNaN() == Eigen::isnan(m1)).all()); + VERIFY((m1.isInf() == Eigen::isinf(m1)).all()); + VERIFY((m1.isFinite() == Eigen::isfinite(m1)).all()); VERIFY_IS_APPROX(m1.inverse(), inverse(m1)); VERIFY_IS_APPROX(m1.log(), log(m1)); VERIFY_IS_APPROX(m1.log10(), log10(m1)); @@ -345,20 +355,20 @@ template void array_complex(const ArrayType& m) VERIFY_IS_APPROX(arg(m1), m3); std::complex zero(0.0,0.0); - VERIFY(isnan(m1*zero/zero).all()); + VERIFY(Eigen::isnan(m1*zero/zero).all()); #if EIGEN_COMP_CLANG // clang's complex division is notoriously broken - if(numext::isinf(m1(0,0)/Scalar(0))) { + if(numext::isinf(m4(0,0)/RealScalar(0))) { #endif - VERIFY(isinf(m1/zero).all()); + VERIFY(Eigen::isinf(m4/zero).all()); #if EIGEN_COMP_CLANG } else { - VERIFY(isinf(m1.real()/zero.real()).all()); + VERIFY(Eigen::isinf(m4.real()/zero.real()).all()); } #endif - VERIFY((isfinite(m1) && !isfinite(m1*zero/zero) && !isfinite(m1/zero)).all()); + VERIFY((Eigen::isfinite(m1) && (!Eigen::isfinite(m1*zero/zero)) && (!Eigen::isfinite(m1/zero))).all()); VERIFY_IS_APPROX(inverse(inverse(m1)),m1); VERIFY_IS_APPROX(conj(m1.conjugate()), m1); diff --git a/test/basicstuff.cpp b/test/basicstuff.cpp index 3d7d74d4e..c6bbf19ee 100644 --- a/test/basicstuff.cpp +++ b/test/basicstuff.cpp @@ -223,7 +223,7 @@ void fixedSizeMatrixConstruction() for(int k=0; k<2; ++k) VERIFY(m2(k) == DenseIndex(raw[k])); for(int k=0; k<2; ++k) VERIFY(a2(k) == DenseIndex(raw[k])); for(int k=0; k<2; ++k) VERIFY(m3(k) == int(raw[k])); - for(int k=0; k<2; ++k) VERIFY(m4(k) == float(raw[k])); + for(int k=0; k<2; ++k) VERIFY((m4(k)) == Scalar(float(raw[k]))); } { Matrix m(raw), m1(raw[0]), m2( (DenseIndex(raw[0])) ), m3( (int(raw[0])) ); diff --git a/test/bdcsvd.cpp b/test/bdcsvd.cpp index 52a02b697..f9f687aac 100644 --- a/test/bdcsvd.cpp +++ b/test/bdcsvd.cpp @@ -69,8 +69,8 @@ void test_bdcsvd() CALL_SUBTEST_7(( svd_verify_assert >(MatrixXf(10,12)) )); CALL_SUBTEST_8(( svd_verify_assert >(MatrixXcd(7,5)) )); - CALL_SUBTEST_1(( svd_all_trivial_2x2(bdcsvd) )); - CALL_SUBTEST_1(( svd_all_trivial_2x2(bdcsvd) )); + CALL_SUBTEST_101(( svd_all_trivial_2x2(bdcsvd) )); + CALL_SUBTEST_102(( svd_all_trivial_2x2(bdcsvd) )); for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_3(( bdcsvd() )); @@ -104,8 +104,8 @@ void test_bdcsvd() CALL_SUBTEST_7( BDCSVD(10,10) ); // Check that preallocation avoids subsequent mallocs - CALL_SUBTEST_9( svd_preallocate() ); + CALL_SUBTEST_9( svd_preallocate() ); - CALL_SUBTEST_2( svd_underoverflow() ); + CALL_SUBTEST_2( svd_underoverflow() ); } diff --git a/test/jacobisvd.cpp b/test/jacobisvd.cpp index f9de6b708..3d8d0203d 100644 --- a/test/jacobisvd.cpp +++ b/test/jacobisvd.cpp @@ -114,7 +114,7 @@ void test_jacobisvd() CALL_SUBTEST_7( JacobiSVD(10,10) ); // Check that preallocation avoids subsequent mallocs - CALL_SUBTEST_9( svd_preallocate() ); + CALL_SUBTEST_9( svd_preallocate() ); - CALL_SUBTEST_2( svd_underoverflow() ); + CALL_SUBTEST_2( svd_underoverflow() ); } diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index 8e3cc9a86..3c7cdbe41 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -88,6 +88,10 @@ template void real_complex(DenseIndex rows = MatrixType::Ro g_called = false; VERIFY_IS_APPROX(m1*s, m1*Scalar(s)); VERIFY(g_called && "matrix * real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1/s, m1/Scalar(s)); + VERIFY(g_called && "matrix / real not properly optimized"); } void test_linearstructure() diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 2f1accd1b..74b4d4279 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -315,9 +315,29 @@ template void packetmath_real() CHECK_CWISE1_IF(internal::packet_traits::HasExp, std::exp, internal::pexp); { data1[0] = std::numeric_limits::quiet_NaN(); + data1[1] = std::numeric_limits::epsilon(); packet_helper::HasExp,Packet> h; - h.store(data2, internal::pexp(h.load(data1))); + h.store(data2, internal::pexp(h.load(data1))); VERIFY(numext::isnan(data2[0])); + VERIFY_IS_EQUAL(std::exp(std::numeric_limits::epsilon()), data2[1]); + + data1[0] = -std::numeric_limits::epsilon(); + data1[1] = 0; + h.store(data2, internal::pexp(h.load(data1))); + VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::epsilon()), data2[0]); + VERIFY_IS_EQUAL(std::exp(0), data2[1]); + + data1[0] = (std::numeric_limits::min)(); + data1[1] = -(std::numeric_limits::min)(); + h.store(data2, internal::pexp(h.load(data1))); + VERIFY_IS_EQUAL(std::exp((std::numeric_limits::min)()), data2[0]); + VERIFY_IS_EQUAL(std::exp(-(std::numeric_limits::min)()), data2[1]); + + data1[0] = std::numeric_limits::denorm_min(); + data1[1] = -std::numeric_limits::denorm_min(); + h.store(data2, internal::pexp(h.load(data1))); + VERIFY_IS_EQUAL(std::exp(std::numeric_limits::denorm_min()), data2[0]); + VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::denorm_min()), data2[1]); } for (int i=0; i void packetmath_real() CHECK_CWISE1_IF(internal::packet_traits::HasLog, std::log, internal::plog); { data1[0] = std::numeric_limits::quiet_NaN(); + data1[1] = std::numeric_limits::epsilon(); packet_helper::HasLog,Packet> h; h.store(data2, internal::plog(h.load(data1))); - VERIFY(numext::isnan(data2[0])); + VERIFY(std::isnan(data2[0])); + // VERIFY_IS_EQUAL(std::log(std::numeric_limits::epsilon()), data2[1]); + + data1[0] = -std::numeric_limits::epsilon(); + data1[1] = 0; + h.store(data2, internal::plog(h.load(data1))); + VERIFY(std::isnan(data2[0])); + // VERIFY_IS_EQUAL(std::log(0), data2[1]); + + data1[0] = (std::numeric_limits::min)(); + data1[1] = -(std::numeric_limits::min)(); + h.store(data2, internal::plog(h.load(data1))); + VERIFY_IS_EQUAL(std::log((std::numeric_limits::min)()), data2[0]); + // VERIFY(std::isnan(data2[1])); + + data1[0] = std::numeric_limits::denorm_min(); + data1[1] = -std::numeric_limits::denorm_min(); + h.store(data2, internal::plog(h.load(data1))); + // VERIFY_IS_EQUAL(std::log(std::numeric_limits::denorm_min()), data2[0]); + // VERIFY(std::isnan(data2[1])); + data1[0] = -1.0f; h.store(data2, internal::plog(h.load(data1))); - VERIFY(numext::isnan(data2[0])); + VERIFY(std::isnan(data2[0])); #if !EIGEN_FAST_MATH h.store(data2, internal::psqrt(h.load(data1))); VERIFY(numext::isnan(data2[0])); diff --git a/test/qr_colpivoting.cpp b/test/qr_colpivoting.cpp index eb3feac01..e7abd3725 100644 --- a/test/qr_colpivoting.cpp +++ b/test/qr_colpivoting.cpp @@ -23,8 +23,8 @@ template void qr() MatrixType m1; createRandomPIMatrixOfRank(rank,rows,cols,m1); ColPivHouseholderQR qr(m1); - VERIFY(rank == qr.rank()); - VERIFY(cols - qr.rank() == qr.dimensionOfKernel()); + VERIFY_IS_EQUAL(rank, qr.rank()); + VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel()); VERIFY(!qr.isInjective()); VERIFY(!qr.isInvertible()); VERIFY(!qr.isSurjective()); @@ -51,11 +51,11 @@ template void qr_fixedsize() Matrix m1; createRandomPIMatrixOfRank(rank,Rows,Cols,m1); ColPivHouseholderQR > qr(m1); - VERIFY(rank == qr.rank()); - VERIFY(Cols - qr.rank() == qr.dimensionOfKernel()); - VERIFY(qr.isInjective() == (rank == Rows)); - VERIFY(qr.isSurjective() == (rank == Cols)); - VERIFY(qr.isInvertible() == (qr.isInjective() && qr.isSurjective())); + VERIFY_IS_EQUAL(rank, qr.rank()); + VERIFY_IS_EQUAL(Cols - qr.rank(), qr.dimensionOfKernel()); + VERIFY_IS_EQUAL(qr.isInjective(), (rank == Rows)); + VERIFY_IS_EQUAL(qr.isSurjective(), (rank == Cols)); + VERIFY_IS_EQUAL(qr.isInvertible(), (qr.isInjective() && qr.isSurjective())); Matrix r = qr.matrixQR().template triangularView(); Matrix c = qr.householderQ() * r * qr.colsPermutation().inverse(); diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp index 601773404..d82e123d0 100644 --- a/test/qr_fullpivoting.cpp +++ b/test/qr_fullpivoting.cpp @@ -23,8 +23,8 @@ template void qr() MatrixType m1; createRandomPIMatrixOfRank(rank,rows,cols,m1); FullPivHouseholderQR qr(m1); - VERIFY(rank == qr.rank()); - VERIFY(cols - qr.rank() == qr.dimensionOfKernel()); + VERIFY_IS_EQUAL(rank, qr.rank()); + VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel()); VERIFY(!qr.isInjective()); VERIFY(!qr.isInvertible()); VERIFY(!qr.isSurjective()); diff --git a/test/ref.cpp b/test/ref.cpp index 18a89f4e9..1341dfef7 100644 --- a/test/ref.cpp +++ b/test/ref.cpp @@ -221,6 +221,12 @@ int test_ref_overload_fun1(Ref ) { return 3; } int test_ref_overload_fun2(Ref ) { return 4; } int test_ref_overload_fun2(Ref ) { return 5; } +void test_ref_ambiguous(const Ref &A, Ref B) +{ + B = A; + B = A - A; +} + // See also bug 969 void test_ref_overloads() { @@ -233,6 +239,9 @@ void test_ref_overloads() VERIFY( test_ref_overload_fun2(Ad)==4 ); VERIFY( test_ref_overload_fun2(Ad+Bd)==4 ); VERIFY( test_ref_overload_fun2(Af+Bf)==5 ); + + ArrayXd A, B; + test_ref_ambiguous(A, B); } void test_ref() diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 75f29a2b4..492b3a4f2 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -365,6 +365,20 @@ template void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(m2, refMat2); } + // test diagonal to sparse + { + DenseVector d = DenseVector::Random(rows); + DenseMatrix refMat2 = d.asDiagonal(); + SparseMatrixType m2(rows, rows); + m2 = d.asDiagonal(); + VERIFY_IS_APPROX(m2, refMat2); + SparseMatrixType m3(d.asDiagonal()); + VERIFY_IS_APPROX(m3, refMat2); + refMat2 += d.asDiagonal(); + m2 += d.asDiagonal(); + VERIFY_IS_APPROX(m2, refMat2); + } + // test conservative resize { std::vector< std::pair > inc; diff --git a/test/sparse_solver.h b/test/sparse_solver.h index 1d3d25b53..64f6ac4fe 100644 --- a/test/sparse_solver.h +++ b/test/sparse_solver.h @@ -272,6 +272,7 @@ template void check_sparse_spd_solving(Solver& solver, int maxS DenseVector b = it.rhs(); DenseVector refX = it.refX(); PermutationMatrix pnull; + halfA.resize(A.rows(), A.cols()); if(Solver::UpLo == (Lower|Upper)) halfA = A; else diff --git a/test/sparseqr.cpp b/test/sparseqr.cpp index 8e6887dd3..50d1fcdf2 100644 --- a/test/sparseqr.cpp +++ b/test/sparseqr.cpp @@ -89,6 +89,11 @@ template void test_sparseqr_scalar() QtQ = Q * Q.adjoint(); idM.resize(Q.rows(), Q.rows()); idM.setIdentity(); VERIFY(idM.isApprox(QtQ)); + + // Q to dense + DenseMat dQ; + dQ = solver.matrixQ(); + VERIFY_IS_APPROX(Q, dQ); } void test_sparseqr() { diff --git a/test/svd_common.h b/test/svd_common.h index b06a8a0f2..d8611b541 100644 --- a/test/svd_common.h +++ b/test/svd_common.h @@ -33,6 +33,7 @@ void svd_check_full(const MatrixType& m, const SvdType& svd) }; typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; typedef Matrix MatrixUType; typedef Matrix MatrixVType; @@ -40,7 +41,10 @@ void svd_check_full(const MatrixType& m, const SvdType& svd) sigma.diagonal() = svd.singularValues().template cast(); MatrixUType u = svd.matrixU(); MatrixVType v = svd.matrixV(); - VERIFY_IS_APPROX(m, u * sigma * v.adjoint()); + RealScalar scaling = m.cwiseAbs().maxCoeff(); + if(scaling<=(std::numeric_limits::min)()) + scaling = RealScalar(1); + VERIFY_IS_APPROX(m/scaling, u * (sigma/scaling) * v.adjoint()); VERIFY_IS_UNITARY(u); VERIFY_IS_UNITARY(v); } @@ -307,6 +311,7 @@ void svd_inf_nan() // Regression test for bug 286: JacobiSVD loops indefinitely with some // matrices containing denormal numbers. +template void svd_underoverflow() { #if defined __INTEL_COMPILER @@ -384,6 +389,7 @@ void svd_all_trivial_2x2( void (*cb)(const MatrixType&,bool) ) } while((id void svd_preallocate() { Vector3f v(3.f, 2.f, 1.f); diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 54d3cc18b..3f9aa1026 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -59,8 +59,10 @@ #include "Eigen/Core" #include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h" @@ -80,6 +82,7 @@ #include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h" @@ -88,6 +91,7 @@ #include "unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h" diff --git a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h index 77b92ee9b..2244e40c2 100644 --- a/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h +++ b/unsupported/Eigen/CXX11/src/Core/util/EmulateCXX11Meta.h @@ -23,7 +23,7 @@ template class array { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } - static const std::size_t size = n; + static const std::size_t size() { return n; } T values[n]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h index 4dbdbfb3e..32936d4be 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/Tensor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -375,6 +375,28 @@ class Tensor : public TensorBase + EIGEN_DEVICE_FUNC + void resize(const Sizes& dimensions) { + array dims; + for (std::size_t i = 0; i < NumIndices; ++i) { + dims[i] = static_cast(dimensions[i]); + } + resize(dims); + } +#else + template + EIGEN_DEVICE_FUNC + void resize(const Sizes& dimensions) { + array dims; + for (std::size_t i = 0; i < NumIndices; ++i) { + dims[i] = static_cast(dimensions[i]); + } + resize(dims); + } +#endif + protected: bool checkIndexRange(const array& indices) const diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index c7cfbfce0..1c31b9d28 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -108,6 +108,12 @@ class TensorBase return unaryExpr(internal::scalar_inverse_op()); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + tanh() const { + return unaryExpr(internal::scalar_tanh_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> exp() const { @@ -295,11 +301,10 @@ class TensorBase return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::SumReducer()); } - const TensorReductionOp, const array, const Derived> + const TensorReductionOp, const DimensionList, const Derived> sum() const { - array in_dims; - for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i; - return TensorReductionOp, const array, const Derived>(derived(), in_dims, internal::SumReducer()); + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::SumReducer()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -308,11 +313,10 @@ class TensorBase return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MeanReducer()); } - const TensorReductionOp, const array, const Derived> + const TensorReductionOp, const DimensionList, const Derived> mean() const { - array in_dims; - for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i; - return TensorReductionOp, const array, const Derived>(derived(), in_dims, internal::MeanReducer()); + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MeanReducer()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -321,11 +325,10 @@ class TensorBase return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::ProdReducer()); } - const TensorReductionOp, const array, const Derived> + const TensorReductionOp, const DimensionList, const Derived> prod() const { - array in_dims; - for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i; - return TensorReductionOp, const array, const Derived>(derived(), in_dims, internal::ProdReducer()); + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::ProdReducer()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -334,11 +337,10 @@ class TensorBase return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MaxReducer()); } - const TensorReductionOp, const array, const Derived> + const TensorReductionOp, const DimensionList, const Derived> maximum() const { - array in_dims; - for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i; - return TensorReductionOp, const array, const Derived>(derived(), in_dims, internal::MaxReducer()); + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MaxReducer()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -347,11 +349,10 @@ class TensorBase return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MinReducer()); } - const TensorReductionOp, const array, const Derived> + const TensorReductionOp, const DimensionList, const Derived> minimum() const { - array in_dims; - for (int i = 0; i < NumDimensions; ++i) in_dims[i] = i; - return TensorReductionOp, const array, const Derived>(derived(), in_dims, internal::MinReducer()); + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MinReducer()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -413,6 +414,26 @@ class TensorBase padding_type); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorVolumePatchOp + extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, + const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1, + const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = 0) const { + return TensorVolumePatchOp(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value); + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorVolumePatchOp + extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, + const Index plane_stride, const Index row_stride, const Index col_stride, + const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride, + const Index padding_top_z, const Index padding_bottom_z, + const Index padding_top, const Index padding_bottom, + const Index padding_left, const Index padding_right, const Scalar padding_value = 0) const { + return TensorVolumePatchOp(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value); + } + // Morphing operators. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorLayoutSwapOp @@ -460,6 +481,18 @@ class TensorBase return TensorStridingOp(derived(), strides); } + // Added support for custom unary and binary operations + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCustomUnaryOp customOp(const CustomUnaryFunc& op) const { + return TensorCustomUnaryOp(derived(), op); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCustomBinaryOp customOp(const OtherDerived& other, const CustomBinaryFunc& op) const { + return TensorCustomBinaryOp(derived(), other, op); + } + // Force the evaluation of the expression. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorForcedEvalOp eval() const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 3b99ef069..2ef5ff205 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -106,8 +106,7 @@ class TensorChippingOp : public TensorBase > { typedef TensorAssignOp Assign; Assign assign(*this, other); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -117,8 +116,7 @@ class TensorChippingOp : public TensorBase > { typedef TensorAssignOp Assign; Assign assign(*this, other); - static const bool Vectorize = TensorEvaluator::PacketAccess; - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h index 6979fb4ec..759e8208f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -88,7 +88,7 @@ class TensorConcatenationOp : public TensorBase Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -98,7 +98,7 @@ class TensorConcatenationOp : public TensorBase Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -334,7 +334,7 @@ templatedimensions().TotalSize()); EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; - PacketReturnType rslt = internal::pstore(values, x); + internal::pstore(values, x); for (int i = 0; i < packetSize; ++i) { coeffRef(index+i) = values[i]; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 270383020..fd2f3abc4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -364,14 +364,6 @@ class TensorContractionInputMapper struct max_n_1 { - static const size_t size = n; -}; -template <> struct max_n_1<0> { - static const size_t size = 1; -}; - - template struct traits > { @@ -459,19 +451,6 @@ class TensorContractionOp : public TensorBase struct Cond {}; - -template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -const T1& choose(Cond, const T1& first, const T2&) { - return first; -} - -template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -const T2& choose(Cond, const T1&, const T2& second) { - return second; -} - - template struct TensorContractionEvaluatorBase { @@ -508,13 +487,13 @@ struct TensorContractionEvaluatorBase static const int RDims = internal::array_size::Dimensions>::value; static const unsigned int ContractDims = internal::array_size::value; - static const int NumDims = internal::max_n_1::size; + static const int NumDims = max_n_1::size; typedef array left_dim_mapper_t; typedef array right_dim_mapper_t; typedef array contract_t; - typedef array::size> left_nocontract_t; - typedef array::size> right_nocontract_t; + typedef array::size> left_nocontract_t; + typedef array::size> right_nocontract_t; typedef DSizes Dimensions; @@ -869,10 +848,10 @@ struct TensorEvaluator right_dim_mapper_t; typedef array contract_t; - typedef array::size> left_nocontract_t; - typedef array::size> right_nocontract_t; + typedef array::size> left_nocontract_t; + typedef array::size> right_nocontract_t; - static const int NumDims = internal::max_n_1::size; + static const int NumDims = max_n_1::size; // Could we use NumDimensions here? typedef DSizes Dimensions; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h index 588770bb4..f6bd949bd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -1241,10 +1241,10 @@ struct TensorEvaluator right_dim_mapper_t; typedef array contract_t; - typedef array::size> left_nocontract_t; - typedef array::size> right_nocontract_t; + typedef array::size> left_nocontract_t; + typedef array::size> right_nocontract_t; - static const int NumDims = internal::max_n_1::size; + static const int NumDims = max_n_1::size; typedef DSizes Dimensions; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index ed87d3100..57030229d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -93,10 +93,10 @@ struct TensorEvaluator right_dim_mapper_t; typedef array contract_t; - typedef array::size> left_nocontract_t; - typedef array::size> right_nocontract_t; + typedef array::size> left_nocontract_t; + typedef array::size> right_nocontract_t; - static const int NumDims = internal::max_n_1::size; + static const int NumDims = max_n_1::size; typedef DSizes Dimensions; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 6b8f71b96..88db9d410 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -510,7 +510,8 @@ struct TensorEvaluator EvalTo; EvalTo evalToTmp(local, m_kernelArg); - internal::TensorExecutor::PacketAccess>::run(evalToTmp, m_device); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; @@ -815,7 +816,8 @@ struct TensorEvaluator EvalTo; EvalTo evalToTmp(local, m_kernelArg); - internal::TensorExecutor::PacketAccess>::run(evalToTmp, m_device); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h new file mode 100644 index 000000000..0157f6fab --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -0,0 +1,310 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H +#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H + +namespace Eigen { + +/** \class TensorCustomUnaryOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor custom class. + * + * + */ +namespace internal { +template +struct traits > +{ + typedef typename XprType::Scalar Scalar; + typedef typename packet_traits::type Packet; + typedef typename XprType::StorageKind StorageKind; + typedef typename XprType::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCustomUnaryOp& type; +}; + +template +struct nested > +{ + typedef TensorCustomUnaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCustomUnaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::Packet Packet; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func) + : m_expr(expr), m_func(func) {} + + EIGEN_DEVICE_FUNC + const CustomUnaryFunc& func() const { return m_func; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_expr; } + + protected: + typename XprType::Nested m_expr; + const CustomUnaryFunc m_func; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorCustomUnaryOp ArgType; + typedef typename internal::traits::Index Index; + static const int NumDims = internal::traits::NumDimensions; + typedef DSizes Dimensions; + typedef + typename internal::remove_const::type Scalar; + + enum { + IsAligned = false, + PacketAccess = (internal::packet_traits::size > 1), + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device) + : m_op(op), m_device(device), m_result(NULL) + { + m_dimensions = op.func().dimensions(op.expression()); + } + + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast( + m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalTo(m_result); + return true; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_result != NULL) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } + + protected: + EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { + TensorMap > result( + data, m_dimensions); + m_op.func().eval(m_op.expression(), result, m_device); + } + + Dimensions m_dimensions; + const ArgType m_op; + const Device& m_device; + CoeffReturnType* m_result; +}; + + + +/** \class TensorCustomBinaryOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor custom class. + * + * + */ +namespace internal { +template +struct traits > +{ + typedef typename internal::promote_storage_type::ret Scalar; + typedef typename packet_traits::type Packet; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename internal::promote_storage_type::ret PacketReturnType; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCustomBinaryOp& type; +}; + +template +struct nested > +{ + typedef TensorCustomBinaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCustomBinaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::Packet Packet; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::traits::CoeffReturnType CoeffReturnType; + typedef typename internal::traits::PacketReturnType PacketReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func) + + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {} + + EIGEN_DEVICE_FUNC + const CustomBinaryFunc& func() const { return m_func; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const CustomBinaryFunc m_func; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorCustomBinaryOp XprType; + typedef typename internal::traits::Index Index; + static const int NumDims = internal::traits::NumDimensions; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + + enum { + IsAligned = false, + PacketAccess = (internal::packet_traits::size > 1), + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_op(op), m_device(device), m_result(NULL) + { + m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression()); + } + + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalTo(m_result); + return true; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_result != NULL) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } + + protected: + EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { + TensorMap > result(data, m_dimensions); + m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); + } + + Dimensions m_dimensions; + const XprType m_op; + const Device& m_device; + CoeffReturnType* m_result; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h index 1018395a1..a4419c665 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h @@ -28,8 +28,25 @@ struct DefaultDevice { ::memset(buffer, c, n); } - EIGEN_STRONG_INLINE size_t numThreads() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { +#ifndef __CUDA_ARCH__ + // Running on the host CPU return 1; +#else + // Running on a CUDA device + return 32; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { +#ifndef __CUDA_ARCH__ + // Running single threaded on the host CPU + // Should return an enum that encodes the ISA supported by the CPU + return 1; +#else + // Running on a CUDA device + return __CUDA_ARCH__ / 100; +#endif } }; @@ -38,10 +55,19 @@ struct DefaultDevice { // We should really use a thread pool here but first we need to find a portable thread pool library. #ifdef EIGEN_USE_THREADS +// This defines an interface that ThreadPoolDevice can take to use +// custom thread pools underneath. +class ThreadPoolInterface { + public: + virtual void Schedule(std::function fn) = 0; + + virtual ~ThreadPoolInterface() {} +}; + // The implementation of the ThreadPool type ensures that the Schedule method // runs the functions it is provided in FIFO order when the scheduling is done // by a single thread. -class ThreadPool { +class ThreadPool : public ThreadPoolInterface { public: // Construct a pool that contains "num_threads" threads. explicit ThreadPool(int num_threads) { @@ -182,7 +208,7 @@ static EIGEN_STRONG_INLINE void wait_until_ready(Notification* n) { // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { - ThreadPoolDevice(ThreadPool* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } + ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { return internal::aligned_malloc(num_bytes); @@ -204,6 +230,11 @@ struct ThreadPoolDevice { return num_threads_; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { + // Should return an enum that encodes the ISA supported by the CPU + return 1; + } + template EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { Notification* n = new Notification(); @@ -219,7 +250,7 @@ struct ThreadPoolDevice { } private: - ThreadPool* pool_; + ThreadPoolInterface* pool_; size_t num_threads_; }; @@ -260,9 +291,12 @@ static inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { assert(status == cudaSuccess); } +// Cuda stream to use when no stream is specified explicitely. +static const cudaStream_t default_stream = cudaStreamDefault; + struct GpuDevice { // The cudastream is not owned: the caller is responsible for its initialization and eventual destruction. - GpuDevice(const cudaStream_t* stream) : stream_(stream) { eigen_assert(stream); } + GpuDevice(const cudaStream_t* stream = &default_stream) : stream_(stream) { eigen_assert(stream); } EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; } @@ -308,6 +342,8 @@ struct GpuDevice { return 32; } + inline int majorDeviceVersion() const { return m_deviceProperties.major; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { cudaStreamSynchronize(*stream_); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h new file mode 100644 index 000000000..9773afccf --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -0,0 +1,235 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H +#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H + +namespace Eigen { + +/** \internal + * + * \class TensorDimensionList + * \ingroup CXX11_Tensor_Module + * + * \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n. + * + * \sa Tensor + */ + +template struct DimensionList { + const Index operator[] (const Index i) const { return i; } +}; + +namespace internal { + +template struct array_size > { + static const size_t value = Rank; +}; +template struct array_size > { + static const size_t value = Rank; +}; + +template const Index array_get(DimensionList&) { + return n; +} +template const Index array_get(const DimensionList&) { + return n; +} + + +#if defined(EIGEN_HAS_CONSTEXPR) +template +struct index_known_statically > { + constexpr bool operator() (const DenseIndex) const { + return true; + } +}; +template +struct index_known_statically > { + constexpr bool operator() (const DenseIndex) const { + return true; + } +}; + +template +struct all_indices_known_statically > { + constexpr bool operator() () const { + return true; + } +}; +template +struct all_indices_known_statically > { + constexpr bool operator() () const { + return true; + } +}; + +template +struct indices_statically_known_to_increase > { + constexpr bool operator() () const { + return true; + } +}; +template +struct indices_statically_known_to_increase > { + constexpr bool operator() () const { + return true; + } +}; + +template +struct index_statically_eq > { + constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { + return i == value; + } +}; +template +struct index_statically_eq > { + constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { + return i == value; + } +}; + +template +struct index_statically_ne > { + constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { + return i != value; + } +}; +template +struct index_statically_ne > { + constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { + return i != value; + } +}; + +template +struct index_statically_gt > { + constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { + return i > value; + } +}; +template +struct index_statically_gt > { + constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { + return i > value; + } +}; + +template +struct index_statically_lt > { + constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { + return i < value; + } +}; +template +struct index_statically_lt > { + constexpr bool operator() (const DenseIndex i, const DenseIndex value) const { + return i < value; + } +}; + +#else +template +struct index_known_statically > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const { + return true; + } +}; +template +struct index_known_statically > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex) const { + return true; + } +}; + +template +struct all_indices_known_statically > { + EIGEN_ALWAYS_INLINE bool operator() () const { + return true; + } +}; +template +struct all_indices_known_statically > { + EIGEN_ALWAYS_INLINE bool operator() () const { + return true; + } +}; + +template +struct indices_statically_known_to_increase > { + EIGEN_ALWAYS_INLINE bool operator() () const { + return true; + } +}; +template +struct indices_statically_known_to_increase > { + EIGEN_ALWAYS_INLINE bool operator() () const { + return true; + } +}; + +template +struct index_statically_eq > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { + return false; + } +}; +template +struct index_statically_eq > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { + return false; + } +}; + +template +struct index_statically_ne > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { + return false; + } +}; +template +struct index_statically_ne > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { + return false; + } +}; + +template +struct index_statically_gt > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { + return false; + } +}; +template +struct index_statically_gt > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { + return false; + } +}; + +template +struct index_statically_lt > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { + return false; + } +}; +template +struct index_statically_lt > { + EIGEN_ALWAYS_INLINE bool operator() (const DenseIndex, const DenseIndex) const { + return false; + } +}; +#endif + +} // end namespace internal +} // end namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 836daea65..5928f0b0c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -69,6 +69,31 @@ struct fixed_size_tensor_index_linearization_helper +struct fixed_size_tensor_index_extraction_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(const Index index, + const Dimensions& dimensions) + { + const Index mult = (index == n) ? 1 : 0; + return array_get(dimensions) * mult + + fixed_size_tensor_index_extraction_helper::run(index, dimensions); + } +}; + +template +struct fixed_size_tensor_index_extraction_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(const Index index, + const Dimensions& dimensions) + { + const Index mult = (index == 0) ? 1 : 0; + return array_get<0>(dimensions) * mult; + } +}; + } // end namespace internal @@ -99,6 +124,10 @@ struct Sizes : internal::numeric_list { } #endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const int index) const { + return internal::fixed_size_tensor_index_extraction_helper::run(index, *this); + } + template Sizes& operator = (const T& /*other*/) { // add assertion failure if the size of other is different return *this; @@ -114,10 +143,12 @@ struct Sizes : internal::numeric_list { } }; +namespace internal { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes&) { return Sizes::total_size; } +} #else @@ -166,6 +197,24 @@ template ::value; + case 1: + return internal::get<1, Base>::value; + case 2: + return internal::get<2, Base>::value; + case 3: + return internal::get<3, Base>::value; + case 4: + return internal::get<4, Base>::value; + default: + eigen_assert(false && "index overflow"); + return static_cast(-1); + } + } + template Sizes& operator = (const T&) { // to do: check the size of other return *this; @@ -181,10 +230,12 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes&) { return Sizes::total_size; } +} #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 883e6cab1..ff4373f59 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -113,9 +113,9 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - m_impl.evalSubExprsIfNeeded(NULL); - return true; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) { + eigen_assert(scalar == NULL); + return m_impl.evalSubExprsIfNeeded(m_buffer); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 6ea588e4b..24606b0c8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -22,13 +22,8 @@ namespace Eigen { */ namespace internal { -template -struct IsVectorizable { - static const bool value = TensorEvaluator::PacketAccess; -}; - // Default strategy: the expression is evaluated with a single cpu thread. -template::value> +template class TensorExecutor { public: @@ -198,10 +193,6 @@ EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) { } } -template -struct IsVectorizable { - static const bool value = TensorEvaluator::PacketAccess && TensorEvaluator::IsAligned; -}; template class TensorExecutor diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 41a36cb75..d253b70f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -116,7 +116,8 @@ struct TensorEvaluator, Device> } typedef TensorEvalToOp EvalTo; EvalTo evalToTmp(m_buffer, m_op); - internal::TensorExecutor::PacketAccess>::run(evalToTmp, m_device); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); m_impl.cleanup(); return true; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index a4224c372..b3bc16bc4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -29,6 +29,7 @@ template class TensorConversionOp; template class TensorConvolutionOp; template class TensorPatchOp; template class TensorImagePatchOp; +template class TensorVolumePatchOp; template class TensorBroadcastingOp; template class TensorChippingOp; template class TensorReshapingOp; @@ -41,14 +42,36 @@ template class TensorStridingOp; template class TensorGeneratorOp; template class TensorAssignOp; +template class TensorCustomUnaryOp; +template class TensorCustomBinaryOp; + template class TensorEvalToOp; template class TensorForcedEvalOp; template class TensorDevice; template struct TensorEvaluator; +class DefaultDevice; +class ThreadPoolDevice; +class GpuDevice; + namespace internal { -template class TensorExecutor; + +template +struct IsVectorizable { + static const bool value = TensorEvaluator::PacketAccess; +}; + +template +struct IsVectorizable { + static const bool value = TensorEvaluator::PacketAccess && + TensorEvaluator::IsAligned; +}; + +template ::value> +class TensorExecutor; + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 1b031b7a1..33e8c01c2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -17,6 +17,7 @@ namespace internal { template struct SumReducer { static const bool PacketAccess = true; + static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { (*accum) += t; @@ -49,6 +50,8 @@ template struct SumReducer template struct MeanReducer { static const bool PacketAccess = true; + static const bool IsStateful = true; + MeanReducer() : scalarCount_(0), packetCount_(0) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { @@ -88,6 +91,7 @@ template struct MeanReducer template struct MaxReducer { static const bool PacketAccess = true; + static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { if (t > *accum) { *accum = t; } @@ -120,6 +124,7 @@ template struct MaxReducer template struct MinReducer { static const bool PacketAccess = true; + static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { if (t < *accum) { *accum = t; } @@ -153,6 +158,7 @@ template struct MinReducer template struct ProdReducer { static const bool PacketAccess = true; + static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { (*accum) *= t; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h index 054ecf7b5..ee66ae192 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -90,7 +90,7 @@ class TensorLayoutSwapOp : public TensorBase, WriteA { typedef TensorAssignOp Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -100,7 +100,7 @@ class TensorLayoutSwapOp : public TensorBase, WriteA { typedef TensorAssignOp Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h new file mode 100644 index 000000000..78feb85cd --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -0,0 +1,36 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H +#define EIGEN_CXX11_TENSOR_TENSOR_META_H + +namespace Eigen { + +template struct Cond {}; + +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +const T1& choose(Cond, const T1& first, const T2&) { + return first; +} + +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +const T2& choose(Cond, const T1&, const T2& second) { + return second; +} + +template struct max_n_1 { + static const size_t size = n; +}; +template <> struct max_n_1<0> { + static const size_t size = 1; +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_META_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index fa1e6931c..34be9b908 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -78,7 +78,7 @@ class TensorReshapingOp : public TensorBase Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -88,7 +88,7 @@ class TensorReshapingOp : public TensorBase Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -262,7 +262,7 @@ class TensorSlicingOp : public TensorBase Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -271,7 +271,7 @@ class TensorSlicingOp : public TensorBase Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -411,7 +411,7 @@ struct TensorEvaluator, Devi { const int packetSize = internal::unpacket_traits::size; EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); Index inputIndices[] = {0, 0}; Index indices[] = {index, index + packetSize - 1}; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 95116aaee..3b3955094 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -44,6 +44,38 @@ struct nested, 1, typename eval struct DimInitializer { + template EIGEN_DEVICE_FUNC + static void run(const InputDims& input_dims, + const array::value>& reduced, + OutputDims* output_dims, ReducedDims* reduced_dims) { + const int NumInputDims = internal::array_size::value; + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (reduced[i]) { + (*reduced_dims)[reduceIndex] = input_dims[i]; + ++reduceIndex; + } else { + (*output_dims)[outputIndex] = input_dims[i]; + ++outputIndex; + } + } + } +}; + +template <> struct DimInitializer > { + template EIGEN_DEVICE_FUNC + static void run(const InputDims& input_dims, const array&, + Sizes<1>*, array* reduced_dims) { + const int NumInputDims = internal::array_size::value; + for (int i = 0; i < NumInputDims; ++i) { + (*reduced_dims)[i] = input_dims[i]; + } + } +}; + + template struct are_inner_most_dims { static const bool value = false; @@ -144,7 +176,7 @@ template struct InnerMostDimPreserver { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); - for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) { + for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) { const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; InnerMostDimPreserver::reduce(self, input, reducer, accum); } @@ -154,13 +186,325 @@ struct InnerMostDimPreserver { template struct InnerMostDimPreserver<0, Self, Op, true> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { - for (int j = 0; j < self.m_reducedDims[0]; ++j) { + for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) { const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; reducer.reducePacket(self.m_impl.template packet(input), accum); } } }; +// Default full reducer +template +struct FullReducer { + static const bool HasOptimizedImplementation = false; + + static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) { + const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions()); + *output = InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); + } +}; + + +#ifdef EIGEN_USE_THREADS +// Multithreaded full reducers +template +struct FullReducerShard { + static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) { + + shard->saccum = reducer.initialize(); + for (typename Eval::Index j = 0; j < numValuesToReduce; ++j) { + reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum); + } + } + + typename Eval::CoeffReturnType saccum; +}; + +template +struct FullReducerShard { + static void run(const Eval& eval, typename Eval::Index firstIndex, typename Eval::Index numValuesToReduce, Op& reducer, FullReducerShard* shard) { + + const int packetSize = internal::unpacket_traits::size; + const typename Eval::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; + + shard->paccum = reducer.template initializePacket(); + for (typename Eval::Index j = 0; j < VectorizedSize; j += packetSize) { + reducer.reducePacket(eval.m_impl.template packet(firstIndex + j), &shard->paccum); + } + shard->saccum = reducer.initialize(); + for (typename Eval::Index j = VectorizedSize; j < numValuesToReduce; ++j) { + reducer.reduce(eval.m_impl.coeff(firstIndex + j), &shard->saccum); + } + } + + typename Eval::PacketReturnType paccum; + typename Eval::CoeffReturnType saccum; +}; + + +template +struct FullReducer { + static const bool HasOptimizedImplementation = !Op::IsStateful; + + // launch one reducer per thread and accumulate the result. + static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) { + typedef typename Self::Index Index; + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + const Index blocksize = std::floor(static_cast(num_coeffs)/device.numThreads()); + const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; + eigen_assert(num_coeffs >= numblocks * blocksize); + + std::vector results; + results.reserve(numblocks); + std::vector > shards; + shards.resize(numblocks); + for (Index i = 0; i < numblocks; ++i) { + results.push_back(device.enqueue(&FullReducerShard::run, self, i*blocksize, blocksize, reducer, &shards[i])); + } + + FullReducerShard finalShard; + if (numblocks * blocksize < num_coeffs) { + FullReducerShard::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard); + } else { + finalShard.saccum = reducer.initialize(); + } + + for (Index i = 0; i < numblocks; ++i) { + wait_until_ready(results[i]); + delete results[i]; + } + + for (Index i = 0; i < numblocks; ++i) { + reducer.reduce(shards[i].saccum, &finalShard.saccum); + } + *output = reducer.finalize(finalShard.saccum); + } +}; + +template +struct FullReducer { + static const bool HasOptimizedImplementation = !Op::IsStateful; + + // launch one reducer per thread and accumulate the result. + static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, typename Self::CoeffReturnType* output) { + typedef typename Self::Index Index; + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + const Index blocksize = std::floor(static_cast(num_coeffs)/device.numThreads()); + const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; + eigen_assert(num_coeffs >= numblocks * blocksize); + + std::vector results; + results.reserve(numblocks); + std::vector > shards; + shards.resize(numblocks); + for (Index i = 0; i < numblocks; ++i) { + results.push_back(device.enqueue(&FullReducerShard::run, self, i*blocksize, blocksize, reducer, &shards[i])); + } + + FullReducerShard finalShard; + if (numblocks * blocksize < num_coeffs) { + FullReducerShard::run(self, numblocks * blocksize, num_coeffs - numblocks * blocksize, reducer, &finalShard); + } else { + finalShard.paccum = reducer.template initializePacket(); + finalShard.saccum = reducer.initialize(); + } + + for (Index i = 0; i < numblocks; ++i) { + wait_until_ready(results[i]); + delete results[i]; + } + + for (Index i = 0; i < numblocks; ++i) { + reducer.reducePacket(shards[i].paccum, &finalShard.paccum); + reducer.reduce(shards[i].saccum, &finalShard.saccum); + } + + *output = reducer.finalizeBoth(finalShard.saccum, finalShard.paccum); + } +}; +#endif + + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +// Full reducers for GPU, don't vectorize for now + +// Reducer function that enables multiple cuda thread to safely accumulate at the same +// output address. It basically reads the current value of the output variable, and +// attempts to update it with the new value. If in the meantime another cuda thread +// updated the content of the output address it will try again. +template +__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { +#if __CUDA_ARCH__ >= 300 + if (sizeof(T) == 4) + { + unsigned int oldval = *reinterpret_cast(output); + unsigned int newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned int readback; + while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else if (sizeof(T) == 8) { + unsigned long long oldval = *reinterpret_cast(output); + unsigned long long newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned long long readback; + while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else { + assert(0 && "Wordsize not supported"); + } +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif +} + +template +__device__ inline void atomicReduce(T* output, T accum, SumReducer&) { +#if __CUDA_ARCH__ >= 300 + atomicAdd(output, accum); +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif +} + +template +__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, + typename Self::CoeffReturnType* output) { + const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; + + if (first_index == 0) { + *output = reducer.initialize(); + } + + typename Self::CoeffReturnType accum = reducer.initialize(); + for (Index i = 0; i < NumPerThread; ++i) { + const Index index = first_index + i * BlockSize; + if (index >= num_coeffs) { + break; + } + typename Self::CoeffReturnType val = input.m_impl.coeff(index); + reducer.reduce(val, &accum); + } + + for (int offset = warpSize/2; offset > 0; offset /= 2) { + reducer.reduce(__shfl_down(accum, offset), &accum); + } + + if ((threadIdx.x & (warpSize - 1)) == 0) { + atomicReduce(output, accum, reducer); + } +} + + +template +struct FullReducer { + // Unfortunately nvidia doesn't support well exotic types such as complex, + // so reduce the scope of the optimized version of the code to the simple case + // of floats. + static const bool HasOptimizedImplementation = !Op::IsStateful && + internal::is_same::value; + + template + static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { + assert(false && "Should only be called on floats"); + } + + static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output) { + typedef typename Self::Index Index; + + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + const int block_size = 256; + const int num_per_thread = 128; + const int num_blocks = std::ceil(static_cast(num_coeffs) / (block_size * num_per_thread)); + LAUNCH_CUDA_KERNEL((FullReductionKernel), + num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); + } +}; + +#endif + + +template +class BlockReducer { + public: + typedef typename Self::Index Index; + typedef typename Self::Scalar Scalar; + typedef typename Self::CoeffReturnType CoeffReturnType; + explicit BlockReducer(const Op& reducer) : op_(reducer) { + accum_ = op_.initialize(); + } + void Reduce(Index index, Index num_values_to_reduce, Scalar* data) { + for (Index i = 0; i < num_values_to_reduce; ++i) { + op_.reduce(data[index + i], &accum_); + } + } + CoeffReturnType Finalize() { + return op_.finalize(accum_); + } + + private: + CoeffReturnType accum_; + Op op_; +}; + + +template +class BlockReducer { + public: + typedef typename Self::Index Index; + typedef typename Self::Scalar Scalar; + typedef typename Self::CoeffReturnType CoeffReturnType; + typedef typename Self::PacketReturnType PacketReturnType; + explicit BlockReducer(const Op& reducer) : op_(reducer) { + vaccum_ = op_.template initializePacket(); + accum_ = op_.initialize(); + } + void Reduce(Index index, Index num_values_to_reduce, Scalar* data) { + const int packet_size = internal::unpacket_traits::size; + const typename Self::Index vectorized_size = (num_values_to_reduce / + packet_size) * packet_size; + for (typename Self::Index i = 0; i < vectorized_size; i += packet_size) { + op_.reducePacket(internal::ploadt( + &data[index + i]), &vaccum_); + } + + for (typename Self::Index i = vectorized_size; + i < num_values_to_reduce; ++i) { + op_.reduce(data[index + i], &accum_); + } + } + typename Self::CoeffReturnType Finalize() { + return op_.finalizeBoth(accum_, vaccum_); + } + + private: + typename Self::PacketReturnType vaccum_; + typename Self::CoeffReturnType accum_; + Op op_; +}; + } // end namespace internal @@ -179,6 +523,7 @@ class TensorReductionOp : public TensorBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims) { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer) { } @@ -186,6 +531,7 @@ class TensorReductionOp : public TensorBase const XprType& expression() const { return m_expr; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dims& dims() const { return m_dims; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op& reducer() const { return m_reducer; } protected: @@ -201,10 +547,11 @@ struct TensorEvaluator, Device> { typedef TensorReductionOp XprType; typedef typename XprType::Index Index; - static const int NumInputDims = internal::array_size::Dimensions>::value; + typedef typename TensorEvaluator::Dimensions InputDimensions; + static const int NumInputDims = internal::array_size::value; static const int NumReducedDims = internal::array_size::value; static const int NumOutputDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims; - typedef DSizes Dimensions; + typedef typename internal::conditional, DSizes >::type Dimensions; typedef typename XprType::Scalar Scalar; typedef TensorEvaluator, Device> Self; static const bool InputPacketAccess = TensorEvaluator::PacketAccess; @@ -218,9 +565,10 @@ struct TensorEvaluator, Device> static const bool ReducingInnerMostDims = internal::are_inner_most_dims::value; static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims::value; + static const bool RunningFullReduction = (NumInputDims==NumReducedDims); EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_reducer(op.reducer()) + : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) { EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), @@ -238,17 +586,7 @@ struct TensorEvaluator, Device> } const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); - int outputIndex = 0; - int reduceIndex = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (reduced[i]) { - m_reducedDims[reduceIndex] = input_dims[i]; - ++reduceIndex; - } else { - m_dimensions[outputIndex] = input_dims[i]; - ++outputIndex; - } - } + internal::DimInitializer::run(input_dims, reduced, &m_dimensions, &m_reducedDims); // Precompute output strides. if (static_cast(Layout) == static_cast(ColMajor)) { @@ -277,8 +615,8 @@ struct TensorEvaluator, Device> } } - outputIndex = 0; - reduceIndex = 0; + int outputIndex = 0; + int reduceIndex = 0; for (int i = 0; i < NumInputDims; ++i) { if (reduced[i]) { m_reducedStrides[reduceIndex] = input_strides[i]; @@ -291,27 +629,50 @@ struct TensorEvaluator, Device> // Special case for full reductions if (NumInputDims == NumReducedDims) { - m_dimensions[0] = 1; + eigen_assert(m_dimensions[0] == 1); m_preservedStrides[0] = internal::array_prod(input_dims); } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename internal::remove_const::type PacketReturnType; + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { m_impl.evalSubExprsIfNeeded(NULL); + + // Use the FullReducer if possible. + if (RunningFullReduction && internal::FullReducer::HasOptimizedImplementation && + ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || + (internal::array_prod(m_impl.dimensions()) > 1024 * 1024))) { + + bool need_assign = false; + if (!data) { + m_result = static_cast(m_device.allocate(sizeof(CoeffReturnType))); + data = m_result; + need_assign = true; + } + + Op reducer(m_reducer); + internal::FullReducer::run(*this, reducer, m_device, data); + return need_assign; + } return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); + if (m_result) { + m_device.deallocate(m_result); + } } - typedef typename internal::remove_const::type CoeffReturnType; - typedef typename internal::remove_const::type PacketReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + if (RunningFullReduction && m_result) { + return *m_result; + } Op reducer(m_reducer); if (ReducingInnerMostDims) { const Index num_values_to_reduce = @@ -372,6 +733,13 @@ struct TensorEvaluator, Device> template friend struct internal::GenericDimReducer; template friend struct internal::InnerMostDimReducer; template friend struct internal::InnerMostDimPreserver; + template friend struct internal::FullReducer; +#ifdef EIGEN_USE_THREADS + template friend struct internal::FullReducerShard; +#endif +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); +#endif // Returns the Index in the input tensor of the first value that needs to be // used to compute the reduction at output index "index". @@ -392,7 +760,12 @@ struct TensorEvaluator, Device> startInput += idx * m_preservedStrides[i]; index -= idx * m_outputStrides[i]; } - startInput += index * m_preservedStrides[0]; + if (PreservingInnerMostDims) { + eigen_assert(m_preservedStrides[0] == 1); + startInput += index; + } else { + startInput += index * m_preservedStrides[0]; + } } else { for (int i = 0; i < NumOutputDims - 1; ++i) { // This is index_i in the output tensor. @@ -400,7 +773,12 @@ struct TensorEvaluator, Device> startInput += idx * m_preservedStrides[i]; index -= idx * m_outputStrides[i]; } - startInput += index * m_preservedStrides[NumOutputDims - 1]; + if (PreservingInnerMostDims) { + eigen_assert(m_preservedStrides[NumOutputDims - 1] == 1); + startInput += index; + } else { + startInput += index * m_preservedStrides[NumOutputDims - 1]; + } } return startInput; } @@ -425,6 +803,16 @@ struct TensorEvaluator, Device> // Operation to apply for computing the reduction. Op m_reducer; + + // For full reductions +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + static const bool RunningOnGPU = internal::is_same::value; +#else + static const bool RunningOnGPU = false; +#endif + CoeffReturnType* m_result; + + const Device& m_device; }; } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index 16bef2ad3..52f95b2a2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -80,7 +80,7 @@ class TensorReverseOp : public TensorBase Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -90,7 +90,7 @@ class TensorReverseOp : public TensorBase Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 1012ecd69..02f73dd37 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -78,7 +78,7 @@ class TensorShufflingOp : public TensorBase { typedef TensorAssignOp Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -88,7 +88,7 @@ class TensorShufflingOp : public TensorBase { typedef TensorAssignOp Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h index 00cb8e373..dd913fbae 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h @@ -78,7 +78,7 @@ class TensorStridingOp : public TensorBase > { typedef TensorAssignOp Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } @@ -88,7 +88,7 @@ class TensorStridingOp : public TensorBase > { typedef TensorAssignOp Assign; Assign assign(*this, other); - internal::TensorExecutor::run(assign, DefaultDevice()); + internal::TensorExecutor::run(assign, DefaultDevice()); return *this; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 757d6cb38..2e6b93bfb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -156,9 +156,9 @@ struct eval, Eigen::Dense> }; // TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector. -template struct nested -{ - typedef typename ref_selector::type type; +template struct nested +{ + typedef typename ref_selector::type type; }; template diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h new file mode 100644 index 000000000..00eabdf5a --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h @@ -0,0 +1,677 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H +#define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H + +namespace Eigen { + +/** \class TensorVolumePatch + * \ingroup CXX11_Tensor_Module + * + * \brief Patch extraction specialized for processing of volumetric data. + * This assumes that the input has a least 4 dimensions ordered as follows: + * - channels + * - planes + * - rows + * - columns + * - (optional) additional dimensions such as time or batch size. + * Calling the volume patch code with patch_planes, patch_rows, and patch_cols + * is equivalent to calling the regular patch extraction code with parameters + * d, patch_planes, patch_rows, patch_cols, and 1 for all the additional + * dimensions. + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename internal::remove_const::type Scalar; + typedef traits XprTraits; + typedef typename packet_traits::type Packet; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions + 1; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorVolumePatchOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorVolumePatchOp type; +}; + +} // end namespace internal + +template +class TensorVolumePatchOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::internal::traits::Packet Packet; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + PaddingType padding_type, Scalar padding_value) + : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), + m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(false), m_padding_top_z(0), m_padding_bottom_z(0), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), + m_padding_type(padding_type), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + DenseIndex padding_top_z, DenseIndex padding_bottom_z, + DenseIndex padding_top, DenseIndex padding_bottom, + DenseIndex padding_left, DenseIndex padding_right, + Scalar padding_value) + : m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides), + m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(true), m_padding_top_z(padding_top_z), m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom), + m_padding_left(padding_left), m_padding_right(padding_right), + m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC + DenseIndex patch_planes() const { return m_patch_planes; } + EIGEN_DEVICE_FUNC + DenseIndex patch_rows() const { return m_patch_rows; } + EIGEN_DEVICE_FUNC + DenseIndex patch_cols() const { return m_patch_cols; } + EIGEN_DEVICE_FUNC + DenseIndex plane_strides() const { return m_plane_strides; } + EIGEN_DEVICE_FUNC + DenseIndex row_strides() const { return m_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_strides() const { return m_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_plane_strides() const { return m_in_plane_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_row_strides() const { return m_in_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_col_strides() const { return m_in_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex plane_inflate_strides() const { return m_plane_inflate_strides; } + EIGEN_DEVICE_FUNC + DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } + EIGEN_DEVICE_FUNC + bool padding_explicit() const { return m_padding_explicit; } + EIGEN_DEVICE_FUNC + DenseIndex padding_top_z() const { return m_padding_top_z; } + EIGEN_DEVICE_FUNC + DenseIndex padding_bottom_z() const { return m_padding_bottom_z; } + EIGEN_DEVICE_FUNC + DenseIndex padding_top() const { return m_padding_top; } + EIGEN_DEVICE_FUNC + DenseIndex padding_bottom() const { return m_padding_bottom; } + EIGEN_DEVICE_FUNC + DenseIndex padding_left() const { return m_padding_left; } + EIGEN_DEVICE_FUNC + DenseIndex padding_right() const { return m_padding_right; } + EIGEN_DEVICE_FUNC + PaddingType padding_type() const { return m_padding_type; } + EIGEN_DEVICE_FUNC + Scalar padding_value() const { return m_padding_value; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const DenseIndex m_patch_planes; + const DenseIndex m_patch_rows; + const DenseIndex m_patch_cols; + const DenseIndex m_plane_strides; + const DenseIndex m_row_strides; + const DenseIndex m_col_strides; + const DenseIndex m_in_plane_strides; + const DenseIndex m_in_row_strides; + const DenseIndex m_in_col_strides; + const DenseIndex m_plane_inflate_strides; + const DenseIndex m_row_inflate_strides; + const DenseIndex m_col_inflate_strides; + const bool m_padding_explicit; + const DenseIndex m_padding_top_z; + const DenseIndex m_padding_bottom_z; + const DenseIndex m_padding_top; + const DenseIndex m_padding_bottom; + const DenseIndex m_padding_left; + const DenseIndex m_padding_right; + const PaddingType m_padding_type; + const Scalar m_padding_value; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorVolumePatchOp XprType; + typedef typename XprType::Index Index; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims + 1; + typedef DSizes Dimensions; + typedef typename internal::remove_const::type Scalar; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = NumDims == 6, + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + EIGEN_STATIC_ASSERT(NumDims >= 5, YOU_MADE_A_PROGRAMMING_MISTAKE); + + m_paddingValue = op.padding_value(); + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + + // Cache a few variables. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputDepth = input_dims[0]; + m_inputPlanes = input_dims[1]; + m_inputRows = input_dims[2]; + m_inputCols = input_dims[3]; + } else { + m_inputDepth = input_dims[NumInputDims-1]; + m_inputPlanes = input_dims[NumInputDims-2]; + m_inputRows = input_dims[NumInputDims-3]; + m_inputCols = input_dims[NumInputDims-4]; + } + + m_plane_strides = op.plane_strides(); + m_row_strides = op.row_strides(); + m_col_strides = op.col_strides(); + + // Input strides and effective input/patch size + m_in_plane_strides = op.in_plane_strides(); + m_in_row_strides = op.in_row_strides(); + m_in_col_strides = op.in_col_strides(); + m_plane_inflate_strides = op.plane_inflate_strides(); + m_row_inflate_strides = op.row_inflate_strides(); + m_col_inflate_strides = op.col_inflate_strides(); + + // The "effective" spatial size after inflating data with zeros. + m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1; + m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; + m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; + m_patch_planes_eff = op.patch_planes() + (op.patch_planes() - 1) * (m_in_plane_strides - 1); + m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); + m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); + + if (op.padding_explicit()) { + m_outputPlanes = numext::ceil((m_input_planes_eff + op.padding_top_z() + op.padding_bottom_z() - m_patch_planes_eff + 1.f) / static_cast(m_plane_strides)); + m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + m_planePaddingTop = op.padding_top_z(); + m_rowPaddingTop = op.padding_top(); + m_colPaddingLeft = op.padding_left(); + } else { + // Computing padding from the type + switch (op.padding_type()) { + case PADDING_VALID: + m_outputPlanes = numext::ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) / static_cast(m_plane_strides)); + m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + m_planePaddingTop = 0; + m_rowPaddingTop = 0; + m_colPaddingLeft = 0; + break; + case PADDING_SAME: { + m_outputPlanes = numext::ceil(m_input_planes_eff / static_cast(m_plane_strides)); + m_outputRows = numext::ceil(m_input_rows_eff / static_cast(m_row_strides)); + m_outputCols = numext::ceil(m_input_cols_eff / static_cast(m_col_strides)); + const Index dz = m_outputPlanes * m_plane_strides + m_patch_planes_eff - 1 - m_input_planes_eff; + const Index dy = m_outputRows * m_row_strides + m_patch_rows_eff - 1 - m_input_rows_eff; + const Index dx = m_outputCols * m_col_strides + m_patch_cols_eff - 1 - m_input_cols_eff; + m_planePaddingTop = dz - dz / 2; + m_rowPaddingTop = dy - dy / 2; + m_colPaddingLeft = dx - dx / 2; + break; + } + default: + eigen_assert(false && "unexpected padding"); + } + } + eigen_assert(m_outputRows > 0); + eigen_assert(m_outputCols > 0); + eigen_assert(m_outputPlanes > 0); + + // Dimensions for result of extraction. + if (static_cast(Layout) == static_cast(ColMajor)) { + // ColMajor + // 0: depth + // 1: patch_planes + // 2: patch_rows + // 3: patch_cols + // 4: number of patches + // 5 and beyond: anything else (such as batch). + m_dimensions[0] = input_dims[0]; + m_dimensions[1] = op.patch_planes(); + m_dimensions[2] = op.patch_rows(); + m_dimensions[3] = op.patch_cols(); + m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols; + for (int i = 5; i < NumDims; ++i) { + m_dimensions[i] = input_dims[i-1]; + } + } else { + // RowMajor + // NumDims-1: depth + // NumDims-2: patch_planes + // NumDims-3: patch_rows + // NumDims-4: patch_cols + // NumDims-5: number of patches + // NumDims-6 and beyond: anything else (such as batch). + m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; + m_dimensions[NumDims-2] = op.patch_planes(); + m_dimensions[NumDims-3] = op.patch_rows(); + m_dimensions[NumDims-4] = op.patch_cols(); + m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols; + for (int i = NumDims-6; i >= 0; --i) { + m_dimensions[i] = input_dims[i]; + } + } + + // Strides for the output tensor. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_rowStride = m_dimensions[1]; + m_colStride = m_dimensions[2] * m_rowStride; + m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0]; + m_otherStride = m_patchStride * m_dimensions[4]; + } else { + m_rowStride = m_dimensions[NumDims-2]; + m_colStride = m_dimensions[NumDims-3] * m_rowStride; + m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1]; + m_otherStride = m_patchStride * m_dimensions[NumDims-5]; + } + + // Strides for navigating through the input tensor. + m_planeInputStride = m_inputDepth; + m_rowInputStride = m_inputDepth * m_inputPlanes; + m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes; + m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes; + + m_outputPlanesRows = m_outputPlanes * m_outputRows; + + // Fast representations of different variables. + m_fastOtherStride = internal::TensorIntDivisor(m_otherStride); + m_fastPatchStride = internal::TensorIntDivisor(m_patchStride); + m_fastColStride = internal::TensorIntDivisor(m_colStride); + m_fastRowStride = internal::TensorIntDivisor(m_rowStride); + m_fastInputRowStride = internal::TensorIntDivisor(m_row_inflate_strides); + m_fastInputColStride = internal::TensorIntDivisor(m_col_inflate_strides); + m_fastInputPlaneStride = internal::TensorIntDivisor(m_plane_inflate_strides); + m_fastInputColsEff = internal::TensorIntDivisor(m_input_cols_eff); + m_fastOutputPlanes = internal::TensorIntDivisor(m_outputPlanes); + m_fastOutputPlanesRows = internal::TensorIntDivisor(m_outputPlanesRows); + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[0]); + } else { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[NumDims-1]); + } + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Patch index corresponding to the passed in index. + const Index patchIndex = index / m_fastPatchStride; + + // Spatial offset within the patch. This has to be translated into 3D + // coordinates within the patch. + const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; + + // Batch, etc. + const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride; + const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; + + // Calculate column index in the input original tensor. + const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; + const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); + if (inputCol < 0 || inputCol >= m_input_cols_eff || + ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { + return Scalar(m_paddingValue); + } + + // Calculate row index in the original input tensor. + const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; + const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride; + const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; + const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); + if (inputRow < 0 || inputRow >= m_input_rows_eff || + ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { + return Scalar(m_paddingValue); + } + + // Calculate plane index in the original input tensor. + const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); + const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride; + const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop; + const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0); + if (inputPlane < 0 || inputPlane >= m_input_planes_eff || + ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) { + return Scalar(m_paddingValue); + } + + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + + const Index inputIndex = depth + + origInputRow * m_rowInputStride + + origInputCol * m_colInputStride + + origInputPlane * m_planeInputStride + + otherIndex * m_otherInputStride; + + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const Index packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + + if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 || + m_in_plane_strides != 1 || m_plane_inflate_strides != 1) { + return packetWithPossibleZero(index); + } + + const Index indices[2] = {index, index + packetSize - 1}; + const Index patchIndex = indices[0] / m_fastPatchStride; + if (patchIndex != indices[1] / m_fastPatchStride) { + return packetWithPossibleZero(index); + } + const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride; + eigen_assert(otherIndex == indices[1] / m_fastOtherStride); + + // Find the offset of the element wrt the location of the first element. + const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, + (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; + + const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; + eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); + + const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; + const Index colOffsets[2] = { + patchOffsets[0] / m_fastColStride, + patchOffsets[1] / m_fastColStride}; + + // Calculate col indices in the original input tensor. + const Index inputCols[2] = { + colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft, + colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; + if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputCols[0] != inputCols[1]) { + return packetWithPossibleZero(index); + } + + const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; + const Index rowOffsets[2] = { + (patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride, + (patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride}; + eigen_assert(rowOffsets[0] <= rowOffsets[1]); + // Calculate col indices in the original input tensor. + const Index inputRows[2] = { + rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop, + rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; + + if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputRows[0] != inputRows[1]) { + return packetWithPossibleZero(index); + } + + const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex)); + const Index planeOffsets[2] = { + patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride, + patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride}; + eigen_assert(planeOffsets[0] <= planeOffsets[1]); + const Index inputPlanes[2] = { + planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop, + planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop}; + + if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) { + // no padding + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + const Index inputIndex = depth + + inputRows[0] * m_rowInputStride + + inputCols[0] * m_colInputStride + + m_planeInputStride * inputPlanes[0] + + otherIndex * m_otherInputStride; + return m_impl.template packet(inputIndex); + } + + return packetWithPossibleZero(index); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + const TensorEvaluator& impl() const { return m_impl; } + + Index planePaddingTop() const { return m_planePaddingTop; } + Index rowPaddingTop() const { return m_rowPaddingTop; } + Index colPaddingLeft() const { return m_colPaddingLeft; } + Index outputPlanes() const { return m_outputPlanes; } + Index outputRows() const { return m_outputRows; } + Index outputCols() const { return m_outputCols; } + Index userPlaneStride() const { return m_plane_strides; } + Index userRowStride() const { return m_row_strides; } + Index userColStride() const { return m_col_strides; } + Index userInPlaneStride() const { return m_in_plane_strides; } + Index userInRowStride() const { return m_in_row_strides; } + Index userInColStride() const { return m_in_col_strides; } + Index planeInflateStride() const { return m_plane_inflate_strides; } + Index rowInflateStride() const { return m_row_inflate_strides; } + Index colInflateStride() const { return m_col_inflate_strides; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const + { + // ColMajor + // 0: depth, 1: patch_planes, 2: patch_rows, 3: patch_cols, 4: number of patches, 5: batches + // RowMajor + // 0: batches, 1: number of patches, 2: patch_cols , 3: patch_rows, 4: patch_planes, 5: depth + const Index patch3DIndex = coords[static_cast(Layout) == static_cast(ColMajor) ? 4 : 1]; + const Index colOffset = coords[static_cast(Layout) == static_cast(ColMajor) ? 3 : 2]; + const Index rowOffset= coords[static_cast(Layout) == static_cast(ColMajor) ? 2 : 3]; + const Index planeOffset = coords[static_cast(Layout) == static_cast(ColMajor) ? 1 : 4]; + + array inputCoords; + + const Index colIndex = patch3DIndex / m_fastOutputPlanesRows; + const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; + const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); + if (inputCol < 0 || inputCol >= m_input_cols_eff || + ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { + return Scalar(m_paddingValue); + } + + const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; + const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; + const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); + if (inputRow < 0 || inputRow >= m_input_rows_eff || + ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { + return Scalar(m_paddingValue); + } + + const Index planeIndex = patch3DIndex - colIndex * m_outputPlanesRows - rowIndex * m_outputRows; + const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop; + const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0); + if (inputPlane < 0 || inputPlane >= m_input_planes_eff || + ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) { + return Scalar(m_paddingValue); + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + inputCoords[0] = coords[0]; // depth + inputCoords[1] = origInputPlane; + inputCoords[2] = origInputRow; + inputCoords[3] = origInputCol; + inputCoords[4] = coords[5]; // batch + } else { + inputCoords[4] = coords[5]; // depth + inputCoords[3] = origInputPlane; + inputCoords[2] = origInputRow; + inputCoords[1] = origInputCol; + inputCoords[0] = coords[0]; // batch + } + if (TensorEvaluator::CoordAccess) { + return m_impl.coeff(inputCoords); + } else { + Index inputIndex; + if (static_cast(Layout) == static_cast(ColMajor)) { + inputIndex = + inputCoords[4] * m_otherInputStride + + inputCoords[3] * m_colInputStride + + inputCoords[2] * m_rowInputStride + + inputCoords[1] * m_planeInputStride + + inputCoords[0]; + } else { + inputIndex = + inputCoords[0] * m_otherInputStride + + inputCoords[1] * m_colInputStride + + inputCoords[2] * m_rowInputStride + + inputCoords[3] * m_planeInputStride + + inputCoords[4]; + } + return m_impl.coeff(inputIndex); + } + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + Dimensions m_dimensions; + + // Parameters passed to the costructor. + Index m_plane_strides; + Index m_row_strides; + Index m_col_strides; + + Index m_outputPlanes; + Index m_outputRows; + Index m_outputCols; + + Index m_planePaddingTop; + Index m_rowPaddingTop; + Index m_colPaddingLeft; + + Index m_in_plane_strides; + Index m_in_row_strides; + Index m_in_col_strides; + + Index m_plane_inflate_strides; + Index m_row_inflate_strides; + Index m_col_inflate_strides; + + // Cached input size. + Index m_inputDepth; + Index m_inputPlanes; + Index m_inputRows; + Index m_inputCols; + + // Other cached variables. + Index m_outputPlanesRows; + + // Effective input/patch post-inflation size. + Index m_input_planes_eff; + Index m_input_rows_eff; + Index m_input_cols_eff; + Index m_patch_planes_eff; + Index m_patch_rows_eff; + Index m_patch_cols_eff; + + // Strides for the output tensor. + Index m_otherStride; + Index m_patchStride; + Index m_rowStride; + Index m_colStride; + + // Strides for the input tensor. + Index m_planeInputStride; + Index m_rowInputStride; + Index m_colInputStride; + Index m_otherInputStride; + + internal::TensorIntDivisor m_fastOtherStride; + internal::TensorIntDivisor m_fastPatchStride; + internal::TensorIntDivisor m_fastColStride; + internal::TensorIntDivisor m_fastRowStride; + internal::TensorIntDivisor m_fastInputPlaneStride; + internal::TensorIntDivisor m_fastInputRowStride; + internal::TensorIntDivisor m_fastInputColStride; + internal::TensorIntDivisor m_fastInputColsEff; + internal::TensorIntDivisor m_fastOutputPlanesRows; + internal::TensorIntDivisor m_fastOutputPlanes; + internal::TensorIntDivisor m_fastOutputDepth; + + Scalar m_paddingValue; + + TensorEvaluator m_impl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index f438d4107..155bfcd76 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -125,6 +125,7 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_padding "-std=c++0x") ei_add_test(cxx11_tensor_patch "-std=c++0x") ei_add_test(cxx11_tensor_image_patch "-std=c++0x") + ei_add_test(cxx11_tensor_volume_patch "-std=c++0x") ei_add_test(cxx11_tensor_reduction "-std=c++0x") ei_add_test(cxx11_tensor_shuffling "-std=c++0x") ei_add_test(cxx11_tensor_striding "-std=c++0x") @@ -136,10 +137,12 @@ if(EIGEN_TEST_CXX11) ei_add_test(cxx11_tensor_layout_swap "-std=c++0x") ei_add_test(cxx11_tensor_io "-std=c++0x") ei_add_test(cxx11_tensor_generator "-std=c++0x") + ei_add_test(cxx11_tensor_custom_op "-std=c++0x") # These tests needs nvcc # ei_add_test(cxx11_tensor_device "-std=c++0x") # ei_add_test(cxx11_tensor_cuda "-std=c++0x") # ei_add_test(cxx11_tensor_contract_cuda "-std=c++0x") +# ei_add_test(cxx11_tensor_reduction_cuda "-std=c++0x") endif() diff --git a/unsupported/test/cxx11_meta.cpp b/unsupported/test/cxx11_meta.cpp index af5cadbf9..4f45e1dd3 100644 --- a/unsupported/test/cxx11_meta.cpp +++ b/unsupported/test/cxx11_meta.cpp @@ -354,7 +354,3 @@ void test_cxx11_meta() CALL_SUBTEST(test_array_zip_and_apply()); CALL_SUBTEST(test_array_misc()); } - -/* - * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; - */ diff --git a/unsupported/test/cxx11_tensor_custom_op.cpp b/unsupported/test/cxx11_tensor_custom_op.cpp new file mode 100644 index 000000000..7e33c9580 --- /dev/null +++ b/unsupported/test/cxx11_tensor_custom_op.cpp @@ -0,0 +1,107 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +using Eigen::Tensor; + + +struct InsertZeros { + DSizes dimensions(const Tensor& input) const { + DSizes result; + result[0] = input.dimension(0) * 2; + result[1] = input.dimension(1) * 2; + return result; + } + + template + void eval(const Tensor& input, Output& output, const Device& device) const + { + array strides{{2, 2}}; + output.stride(strides).device(device) = input; + + Eigen::DSizes offsets(1,1); + Eigen::DSizes extents(output.dimension(0)-1, output.dimension(1)-1); + output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f); + } +}; + +static void test_custom_unary_op() +{ + Tensor tensor(3,5); + tensor.setRandom(); + + Tensor result = tensor.customOp(InsertZeros()); + VERIFY_IS_EQUAL(result.dimension(0), 6); + VERIFY_IS_EQUAL(result.dimension(1), 10); + + for (int i = 0; i < 6; i+=2) { + for (int j = 0; j < 10; j+=2) { + VERIFY_IS_EQUAL(result(i, j), tensor(i/2, j/2)); + } + } + for (int i = 1; i < 6; i+=2) { + for (int j = 1; j < 10; j+=2) { + VERIFY_IS_EQUAL(result(i, j), 0); + } + } +} + + +struct BatchMatMul { + DSizes dimensions(const Tensor& input1, const Tensor& input2) const { + DSizes result; + result[0] = input1.dimension(0); + result[1] = input2.dimension(1); + result[2] = input2.dimension(2); + return result; + } + + template + void eval(const Tensor& input1, const Tensor& input2, + Output& output, const Device& device) const + { + typedef Tensor::DimensionPair DimPair; + array dims({{DimPair(1, 0)}}); + for (int i = 0; i < output.dimension(2); ++i) { + output.template chip<2>(i).device(device) = input1.chip<2>(i).contract(input2.chip<2>(i), dims); + } + } +}; + + +static void test_custom_binary_op() +{ + Tensor tensor1(2,3,5); + tensor1.setRandom(); + Tensor tensor2(3,7,5); + tensor2.setRandom(); + + Tensor result = tensor1.customOp(tensor2, BatchMatMul()); + for (int i = 0; i < 5; ++i) { + typedef Tensor::DimensionPair DimPair; + array dims({{DimPair(1, 0)}}); + Tensor reference = tensor1.chip<2>(i).contract(tensor2.chip<2>(i), dims); + TensorRef> val = result.chip<2>(i); + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(val(j, k), reference(j, k)); + } + } + } +} + + +void test_cxx11_tensor_custom_op() +{ + CALL_SUBTEST(test_custom_unary_op()); + CALL_SUBTEST(test_custom_binary_op()); +} diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cpp b/unsupported/test/cxx11_tensor_reduction_cuda.cpp new file mode 100644 index 000000000..a7eb7ac75 --- /dev/null +++ b/unsupported/test/cxx11_tensor_reduction_cuda.cpp @@ -0,0 +1,55 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda +#define EIGEN_USE_GPU + +#include "main.h" +#include + + +template +static void test_full_reductions() { + + Eigen::GpuDevice gpu_device; + + const int num_rows = internal::random(1024, 5*1024); + const int num_cols = internal::random(1024, 5*1024); + + Tensor in(num_rows, num_cols); + in.setRandom(); + + Tensor full_redux(1); + full_redux = in.sum(); + + std::size_t in_bytes = in.size() * sizeof(float); + std::size_t out_bytes = full_redux.size() * sizeof(float); + float* gpu_in_ptr = static_cast(gpu_device.allocate(in_bytes)); + float* gpu_out_ptr = static_cast(gpu_device.allocate(out_bytes)); + gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); + + TensorMap > in_gpu(gpu_in_ptr, num_rows, num_cols); + TensorMap > out_gpu(gpu_out_ptr, 1); + + out_gpu.device(gpu_device) = in_gpu.sum(); + + Tensor full_redux_gpu(1); + gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); + gpu_device.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux(0), full_redux_gpu(0)); +} + +void test_cxx11_tensor_reduction_cuda() { + CALL_SUBTEST(test_full_reductions()); + CALL_SUBTEST(test_full_reductions()); +} diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index 0a20a01a4..5ec7c8bf4 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -228,6 +228,29 @@ static void test_multithread_contraction_agrees_with_singlethread() { } +template +static void test_multithreaded_reductions() { + const int num_threads = internal::random(3, 11); + ThreadPool thread_pool(num_threads); + Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads); + + const int num_rows = internal::random(13, 732); + const int num_cols = internal::random(13, 732); + Tensor t1(num_rows, num_cols); + t1.setRandom(); + + Tensor full_redux(1); + full_redux = t1.sum(); + + Tensor full_redux_tp(1); + full_redux_tp.device(thread_pool_device) = t1.sum(); + + // Check that the single threaded and the multi threaded reductions return + // the same result. + VERIFY_IS_APPROX(full_redux(0), full_redux_tp(0)); +} + + static void test_memcpy() { for (int i = 0; i < 5; ++i) { @@ -271,6 +294,9 @@ void test_cxx11_tensor_thread_pool() CALL_SUBTEST(test_contraction_corner_cases()); CALL_SUBTEST(test_contraction_corner_cases()); + CALL_SUBTEST(test_multithreaded_reductions()); + CALL_SUBTEST(test_multithreaded_reductions()); + CALL_SUBTEST(test_memcpy()); CALL_SUBTEST(test_multithread_random()); diff --git a/unsupported/test/cxx11_tensor_volume_patch.cpp b/unsupported/test/cxx11_tensor_volume_patch.cpp new file mode 100644 index 000000000..ca6840f3b --- /dev/null +++ b/unsupported/test/cxx11_tensor_volume_patch.cpp @@ -0,0 +1,112 @@ +#include "main.h" + +#include + +using Eigen::Tensor; + +static void test_single_voxel_patch() +{ + Tensor tensor(4,2,3,5,7); + tensor.setRandom(); + Tensor tensor_row_major = tensor.swap_layout(); + + Tensor single_voxel_patch; + single_voxel_patch = tensor.extract_volume_patches(1, 1, 1); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(0), 4); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(1), 1); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(2), 1); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(3), 1); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(4), 2 * 3 * 5); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(5), 7); + + Tensor single_voxel_patch_row_major; + single_voxel_patch_row_major = tensor_row_major.extract_volume_patches(1, 1, 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(0), 7); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(1), 2 * 3 * 5); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(2), 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(3), 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(4), 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(5), 4); + + for (int i = 0; i < tensor.size(); ++i) { + VERIFY_IS_EQUAL(tensor.data()[i], single_voxel_patch.data()[i]); + VERIFY_IS_EQUAL(tensor_row_major.data()[i], single_voxel_patch_row_major.data()[i]); + VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]); + } +} + + +static void test_entire_volume_patch() +{ + const int depth = 4; + const int patch_z = 2; + const int patch_y = 3; + const int patch_x = 5; + const int batch = 7; + + Tensor tensor(depth, patch_z, patch_y, patch_x, batch); + tensor.setRandom(); + Tensor tensor_row_major = tensor.swap_layout(); + + Tensor entire_volume_patch; + entire_volume_patch = tensor.extract_volume_patches(patch_z, patch_y, patch_x); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(0), depth); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(1), patch_z); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(2), patch_y); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(3), patch_x); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(4), patch_z * patch_y * patch_x); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(5), batch); + + Tensor entire_volume_patch_row_major; + entire_volume_patch_row_major = tensor_row_major.extract_volume_patches(patch_z, patch_y, patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(0), batch); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(1), patch_z * patch_y * patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(2), patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(3), patch_y); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(4), patch_z); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(5), depth); + + const int dz = patch_z - 1; + const int dy = patch_y - 1; + const int dx = patch_x - 1; + + const int forward_pad_z = dz - dz / 2; + const int forward_pad_y = dy - dy / 2; + const int forward_pad_x = dx - dx / 2; + + for (int pz = 0; pz < patch_z; pz++) { + for (int py = 0; py < patch_y; py++) { + for (int px = 0; px < patch_x; px++) { + const int patchId = pz + patch_z * (py + px * patch_y); + for (int z = 0; z < patch_z; z++) { + for (int y = 0; y < patch_y; y++) { + for (int x = 0; x < patch_x; x++) { + for (int b = 0; b < batch; b++) { + for (int d = 0; d < depth; d++) { + float expected = 0.0f; + float expected_row_major = 0.0f; + const int eff_z = z - forward_pad_z + pz; + const int eff_y = y - forward_pad_y + py; + const int eff_x = x - forward_pad_x + px; + if (eff_z >= 0 && eff_y >= 0 && eff_x >= 0 && + eff_z < patch_z && eff_y < patch_y && eff_x < patch_x) { + expected = tensor(d, eff_z, eff_y, eff_x, b); + expected_row_major = tensor_row_major(b, eff_x, eff_y, eff_z, d); + } + VERIFY_IS_EQUAL(entire_volume_patch(d, z, y, x, patchId, b), expected); + VERIFY_IS_EQUAL(entire_volume_patch_row_major(b, patchId, x, y, z, d), expected_row_major); + } + } + } + } + } + } + } + } +} + +void test_cxx11_tensor_volume_patch() +{ + CALL_SUBTEST(test_single_voxel_patch()); + CALL_SUBTEST(test_entire_volume_patch()); +}