Merged latest changes from upstream/eigen

This commit is contained in:
Eugene Zhulenev 2018-08-01 11:59:04 -07:00
commit 385b3ff12f
61 changed files with 1319 additions and 357 deletions

View File

@ -1,3 +1,4 @@
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "2000") set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "2000")
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "2000") set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "2000")
list(APPEND CTEST_CUSTOM_ERROR_EXCEPTION @EIGEN_CTEST_ERROR_EXCEPTION@)

View File

@ -179,6 +179,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/NEON/PacketMath.h" #include "src/Core/arch/NEON/PacketMath.h"
#include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/MathFunctions.h"
#include "src/Core/arch/NEON/Complex.h" #include "src/Core/arch/NEON/Complex.h"
#include "src/Core/arch/NEON/TypeCasting.h"
#elif defined EIGEN_VECTORIZE_ZVECTOR #elif defined EIGEN_VECTORIZE_ZVECTOR
#include "src/Core/arch/ZVector/PacketMath.h" #include "src/Core/arch/ZVector/PacketMath.h"
#include "src/Core/arch/ZVector/MathFunctions.h" #include "src/Core/arch/ZVector/MathFunctions.h"

View File

@ -258,48 +258,39 @@ pexp<Packet8d>(const Packet8d& _x) {
template <> template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
psqrt<Packet16f>(const Packet16f& _x) { psqrt<Packet16f>(const Packet16f& _x) {
_EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f); Packet16f neg_half = pmul(_x, pset1<Packet16f>(-.5f));
_EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f); __mmask16 denormal_mask = _mm512_kand(
_EIGEN_DECLARE_CONST_Packet16f_FROM_INT(flt_min, 0x00800000); _mm512_cmp_ps_mask(_x, pset1<Packet16f>((std::numeric_limits<float>::min)()),
_CMP_LT_OQ),
_mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
Packet16f neg_half = pmul(_x, p16f_minus_half); Packet16f x = _mm512_rsqrt14_ps(_x);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
// Do a single step of Newton's iteration. // Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet16f>(1.5f)));
// Multiply the original _x by it's reciprocal square root to extract the // Flush results for denormals to zero.
// square root. return _mm512_mask_blend_ps(denormal_mask, pmul(_x,x), _mm512_setzero_ps());
return pmul(_x, x);
} }
template <> template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
psqrt<Packet8d>(const Packet8d& _x) { psqrt<Packet8d>(const Packet8d& _x) {
_EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5); Packet8d neg_half = pmul(_x, pset1<Packet8d>(-.5f));
_EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5); __mmask16 denormal_mask = _mm512_kand(
_EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(dbl_min, 0x0010000000000000LL); _mm512_cmp_pd_mask(_x, pset1<Packet8d>((std::numeric_limits<double>::min)()),
_CMP_LT_OQ),
_mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
Packet8d neg_half = pmul(_x, p8d_minus_half); Packet8d x = _mm512_rsqrt14_pd(_x);
// select only the inverse sqrt of positive normal inputs (denormals are // Do a single step of Newton's iteration.
// flushed to zero and cause infs as well). x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5f)));
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
// Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Do a second step of Newton's iteration. // Do a second step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), pset1<Packet8d>(1.5f)));
// Multiply the original _x by it's reciprocal square root to extract the return _mm512_mask_blend_pd(denormal_mask, pmul(_x,x), _mm512_setzero_pd());
// square root.
return pmul(_x, x);
} }
#else #else
template <> template <>

View File

@ -0,0 +1,48 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2018 Rasmus Munk Larsen <rmlarsen@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TYPE_CASTING_NEON_H
#define EIGEN_TYPE_CASTING_NEON_H
namespace Eigen {
namespace internal {
template <>
struct type_casting_traits<float, int> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template <>
struct type_casting_traits<int, float> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
return vcvtq_s32_f32(a);
}
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
return vcvtq_f32_s32(a);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TYPE_CASTING_NEON_H

View File

@ -0,0 +1,104 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*****************************************************************
* InteropHeaders.h
*
* \brief:
* InteropHeaders
*
*****************************************************************/
#ifndef EIGEN_INTEROP_HEADERS_SYCL_H
#define EIGEN_INTEROP_HEADERS_SYCL_H
#if defined EIGEN_USE_SYCL
namespace Eigen {
namespace internal {
#define SYCL_PACKET_TRAITS(packet_type, val, unpacket_type, lengths)\
template<> struct packet_traits<unpacket_type> : default_packet_traits\
{\
typedef packet_type type;\
typedef packet_type half;\
enum {\
Vectorizable = 1,\
AlignedOnScalar = 1,\
size=lengths,\
HasHalfPacket = 0,\
HasDiv = 1,\
HasLog = 1,\
HasExp = 1,\
HasSqrt = 1,\
HasRsqrt = 1,\
HasSin = 1,\
HasCos = 1,\
HasTan = 1,\
HasASin = 1,\
HasACos = 1,\
HasATan = 1,\
HasSinh = 1,\
HasCosh = 1,\
HasTanh = 1,\
HasLGamma = 0,\
HasDiGamma = 0,\
HasZeta = 0,\
HasPolygamma = 0,\
HasErf = 0,\
HasErfc = 0,\
HasIGamma = 0,\
HasIGammac = 0,\
HasBetaInc = 0,\
HasBlend = val,\
HasMax=1,\
HasMin=1,\
HasMul=1,\
HasAdd=1,\
HasFloor=1,\
HasRound=1,\
HasLog1p=1,\
HasExpm1=1,\
HasCeil=1,\
};\
};
SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4)
SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, const float, 4)
SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, double, 2)
SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2)
#undef SYCL_PACKET_TRAITS
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
#define SYCL_ARITHMETIC(packet_type) template<> struct is_arithmetic<packet_type> { enum { value = true }; };
SYCL_ARITHMETIC(cl::sycl::cl_float4)
SYCL_ARITHMETIC(cl::sycl::cl_double2)
#undef SYCL_ARITHMETIC
#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\
template<> struct unpacket_traits<packet_type> {\
typedef unpacket_type type;\
enum {size=lengths, alignment=Aligned16};\
typedef packet_type half;\
};
SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)
SYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2)
#undef SYCL_UNPACKET_TRAITS
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_USE_SYCL
#endif // EIGEN_INTEROP_HEADERS_SYCL_H

View File

@ -0,0 +1,221 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*****************************************************************
* MathFunctions.h
*
* \brief:
* MathFunctions
*
*****************************************************************/
#ifndef EIGEN_MATH_FUNCTIONS_SYCL_H
#define EIGEN_MATH_FUNCTIONS_SYCL_H
namespace Eigen {
namespace internal {
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
//#if defined(__SYCL_DEVICE_ONLY__) && defined(EIGEN_USE_SYCL)
#define SYCL_PLOG(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type plog<packet_type>(const packet_type& a) { return cl::sycl::log(a); }
SYCL_PLOG(cl::sycl::cl_float4)
SYCL_PLOG(cl::sycl::cl_double2)
#undef SYCL_PLOG
#define SYCL_PLOG1P(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type plog1p<packet_type>(const packet_type& a) { return cl::sycl::log1p(a); }
SYCL_PLOG1P(cl::sycl::cl_float4)
SYCL_PLOG1P(cl::sycl::cl_double2)
#undef SYCL_PLOG1P
#define SYCL_PLOG10(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type plog10<packet_type>(const packet_type& a) { return cl::sycl::log10(a); }
SYCL_PLOG10(cl::sycl::cl_float4)
SYCL_PLOG10(cl::sycl::cl_double2)
#undef SYCL_PLOG10
#define SYCL_PEXP(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pexp<packet_type>(const packet_type& a) { return cl::sycl::exp(a); }
SYCL_PEXP(cl::sycl::cl_float4)
SYCL_PEXP(cl::sycl::cl_double2)
#undef SYCL_PEXP
#define SYCL_PEXPM1(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pexpm1<packet_type>(const packet_type& a) { return cl::sycl::expm1(a); }
SYCL_PEXPM1(cl::sycl::cl_float4)
SYCL_PEXPM1(cl::sycl::cl_double2)
#undef SYCL_PEXPM1
#define SYCL_PSQRT(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type psqrt<packet_type>(const packet_type& a) { return cl::sycl::sqrt(a); }
SYCL_PSQRT(cl::sycl::cl_float4)
SYCL_PSQRT(cl::sycl::cl_double2)
#undef SYCL_PSQRT
#define SYCL_PRSQRT(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type prsqrt<packet_type>(const packet_type& a) { return cl::sycl::rsqrt(a); }
SYCL_PRSQRT(cl::sycl::cl_float4)
SYCL_PRSQRT(cl::sycl::cl_double2)
#undef SYCL_PRSQRT
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
#define SYCL_PSIN(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type psin<packet_type>(const packet_type& a) { return cl::sycl::sin(a); }
SYCL_PSIN(cl::sycl::cl_float4)
SYCL_PSIN(cl::sycl::cl_double2)
#undef SYCL_PSIN
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
#define SYCL_PCOS(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pcos<packet_type>(const packet_type& a) { return cl::sycl::cos(a); }
SYCL_PCOS(cl::sycl::cl_float4)
SYCL_PCOS(cl::sycl::cl_double2)
#undef SYCL_PCOS
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
#define SYCL_PTAN(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type ptan<packet_type>(const packet_type& a) { return cl::sycl::tan(a); }
SYCL_PTAN(cl::sycl::cl_float4)
SYCL_PTAN(cl::sycl::cl_double2)
#undef SYCL_PTAN
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
#define SYCL_PASIN(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pasin<packet_type>(const packet_type& a) { return cl::sycl::asin(a); }
SYCL_PASIN(cl::sycl::cl_float4)
SYCL_PASIN(cl::sycl::cl_double2)
#undef SYCL_PASIN
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
#define SYCL_PACOS(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pacos<packet_type>(const packet_type& a) { return cl::sycl::acos(a); }
SYCL_PACOS(cl::sycl::cl_float4)
SYCL_PACOS(cl::sycl::cl_double2)
#undef SYCL_PACOS
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
#define SYCL_PATAN(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type patan<packet_type>(const packet_type& a) { return cl::sycl::atan(a); }
SYCL_PATAN(cl::sycl::cl_float4)
SYCL_PATAN(cl::sycl::cl_double2)
#undef SYCL_PATAN
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
#define SYCL_PSINH(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type psinh<packet_type>(const packet_type& a) { return cl::sycl::sinh(a); }
SYCL_PSINH(cl::sycl::cl_float4)
SYCL_PSINH(cl::sycl::cl_double2)
#undef SYCL_PSINH
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
#define SYCL_PCOSH(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pcosh<packet_type>(const packet_type& a) { return cl::sycl::cosh(a); }
SYCL_PCOSH(cl::sycl::cl_float4)
SYCL_PCOSH(cl::sycl::cl_double2)
#undef SYCL_PCOSH
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
#define SYCL_PTANH(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type ptanh<packet_type>(const packet_type& a) { return cl::sycl::tanh(a); }
SYCL_PTANH(cl::sycl::cl_float4)
SYCL_PTANH(cl::sycl::cl_double2)
#undef SYCL_PTANH
#define SYCL_PCEIL(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pceil<packet_type>(const packet_type& a) { return cl::sycl::ceil(a); }
SYCL_PCEIL(cl::sycl::cl_float4)
SYCL_PCEIL(cl::sycl::cl_double2)
#undef SYCL_PCEIL
#define SYCL_PROUND(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pround<packet_type>(const packet_type& a) { return cl::sycl::round(a); }
SYCL_PROUND(cl::sycl::cl_float4)
SYCL_PROUND(cl::sycl::cl_double2)
#undef SYCL_PROUND
#define SYCL_FLOOR(packet_type) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pfloor<packet_type>(const packet_type& a) { return cl::sycl::floor(a); }
SYCL_FLOOR(cl::sycl::cl_float4)
SYCL_FLOOR(cl::sycl::cl_double2)
#undef SYCL_FLOOR
#define SYCL_PMIN(packet_type, expr) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pmin<packet_type>(const packet_type& a, const packet_type& b) { return expr; }
SYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b))
SYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b))
#undef SYCL_PMIN
#define SYCL_PMAX(packet_type, expr) \
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
packet_type pmax<packet_type>(const packet_type& a, const packet_type& b) { return expr; }
SYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b))
SYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b))
#undef SYCL_PMAX
//#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_CUDA_H

View File

@ -0,0 +1,458 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*****************************************************************
* PacketMath.h
*
* \brief:
* PacketMath
*
*****************************************************************/
#ifndef EIGEN_PACKET_MATH_SYCL_H
#define EIGEN_PACKET_MATH_SYCL_H
#include <type_traits>
#if defined EIGEN_USE_SYCL
namespace Eigen {
namespace internal {
#define SYCL_PLOADT_RO(address_space_target)\
template<typename packet_type, int Alignment>\
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
ploadt_ro(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
cl::sycl::access::address_space::address_space_target>::pointer_t from) {\
typedef typename unpacket_traits<packet_type>::type scalar;\
typedef cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target> multi_ptr;\
auto res=packet_type(static_cast<typename unpacket_traits<packet_type>::type>(0));\
res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from)));\
return res;\
}
SYCL_PLOADT_RO(global_space)
SYCL_PLOADT_RO(local_space)
#undef SYCL_PLOADT_RO
#define SYCL_PLOAD(address_space_target, Alignment, AlignedType)\
template<typename packet_type> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
pload##AlignedType(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
cl::sycl::access::address_space::address_space_target>::pointer_t from) {\
return ploadt_ro<packet_type, Alignment>(from);\
}
// global space
SYCL_PLOAD(global_space, Unaligned, u)
SYCL_PLOAD(global_space, Aligned, )
// local space
SYCL_PLOAD(local_space, Unaligned, u)
SYCL_PLOAD(local_space, Aligned, )
// private space
//SYCL_PLOAD(private_space, Unaligned, u)
//SYCL_PLOAD(private_space, Aligned, )
#undef SYCL_PLOAD
/** \internal \returns a packet version of \a *from.
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
#define SYCL_PLOADT(address_space_target)\
template<typename packet_type, int Alignment>\
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(\
typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
cl::sycl::access::address_space::address_space_target>::pointer_t from)\
{\
if(Alignment >= unpacket_traits<packet_type>::alignment)\
return pload<packet_type>(from);\
else\
return ploadu<packet_type>(from);\
}
// global space
SYCL_PLOADT(global_space)
// local space
SYCL_PLOADT(local_space)
//private_space
// There is no need to specialise it for private space as it can use the GenericPacketMath version
#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment)\
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
ploadt_ro<packet_type, Alignment>(const typename unpacket_traits<packet_type>::type * from) { \
typedef typename unpacket_traits<packet_type>::type scalar;\
auto res=packet_type(static_cast<scalar>(0));\
res. template load<cl::sycl::access::address_space::private_space>(0, const_cast<scalar*>(from));\
return res;\
}
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type)\
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
pload##alignment_type(const typename unpacket_traits<packet_type>::type * from) { \
typedef typename unpacket_traits<packet_type>::type scalar;\
auto res=packet_type(static_cast<scalar>(0));\
res. template load<cl::sycl::access::address_space::private_space>(0, const_cast<scalar*>(from));\
return res;\
}
SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4,)
SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2,)
SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
#undef SYCL_PLOAD_SPECIAL
#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment)\
template<>\
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
typename cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target>::pointer_t to, \
const packet_type& from) {\
typedef cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target> multi_ptr;\
from.store(0, multi_ptr(to));\
}
// global space
SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)
SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
#define SYCL_PSTORE_T(scalar, packet_type, Alignment)\
template<>\
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret<scalar, packet_type, Alignment>(\
scalar* to,\
const packet_type& from) {\
if(Alignment)\
pstore(to, from);\
else\
pstoreu(to,from);\
}
SYCL_PSTORE_T(float, cl::sycl::cl_float4, Aligned)
SYCL_PSTORE_T(float, cl::sycl::cl_float4, Unaligned)
SYCL_PSTORE_T(double, cl::sycl::cl_double2, Aligned)
SYCL_PSTORE_T(double, cl::sycl::cl_double2, Unaligned)
#undef SYCL_PSTORE_T
#define SYCL_PSET1(packet_type)\
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>(\
const typename unpacket_traits<packet_type>::type& from) {\
return packet_type(from);\
}
// global space
SYCL_PSET1(cl::sycl::cl_float4)
SYCL_PSET1(cl::sycl::cl_double2)
#undef SYCL_PSET1
template <typename packet_type> struct get_base_packet {
template <typename sycl_multi_pointer>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer ) {}
template <typename sycl_multi_pointer>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer , Index ) {}
};
template <> struct get_base_packet <cl::sycl::cl_float4> {
template <typename sycl_multi_pointer>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
}
template <typename sycl_multi_pointer>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
return cl::sycl::cl_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
}
template <typename sycl_multi_pointer>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_float4& from, Index stride) {
auto tmp = stride;
to[0] = from.x();
to[tmp] = from.y();
to[tmp += stride] = from.z();
to[tmp += stride] = from.w();
}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) {
return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a+1), static_cast<float>(a+2), static_cast<float>(a+3));
}
};
template <> struct get_base_packet <cl::sycl::cl_double2> {
template <typename sycl_multi_pointer>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) {
return cl::sycl::cl_double2(from[0], from[0]);
}
template <typename sycl_multi_pointer, typename Index>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from, Index stride) {
return cl::sycl::cl_double2(from[0*stride], from[1*stride]);
}
template <typename sycl_multi_pointer>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_double2& from, Index stride) {
to[0] = from.x();
to[stride] = from.y();
}
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) {
return cl::sycl::cl_double2(static_cast<double>(a), static_cast<double>(a + 1));
}
};
#define SYCL_PLOAD_DUP(address_space_target)\
template<typename packet_type> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
ploaddup(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
cl::sycl::access::address_space::address_space_target>::pointer_t from)\
{\
return get_base_packet<packet_type>::get_ploaddup(from); \
}
// global space
SYCL_PLOAD_DUP(global_space)
// local_space
SYCL_PLOAD_DUP(local_space)
// private_space
//SYCL_PLOAD_DUP(private_space)
#undef SYCL_PLOAD_DUP
#define SYCL_PLOAD_DUP_SPECILIZE(packet_type)\
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
ploaddup<packet_type>(const typename unpacket_traits<packet_type>::type * from)\
{ \
return get_base_packet<packet_type>::get_ploaddup(from); \
}
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
#undef SYCL_PLOAD_DUP_SPECILIZE
#define SYCL_PLSET(packet_type)\
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>(const typename unpacket_traits<packet_type>::type& a) {\
return get_base_packet<packet_type>::set_plset(a);\
}
SYCL_PLSET(cl::sycl::cl_float4)
SYCL_PLSET(cl::sycl::cl_double2)
#undef SYCL_PLSET
#define SYCL_PGATHER(address_space_target)\
template<typename Scalar, typename packet_type> EIGEN_DEVICE_FUNC inline packet_type pgather(\
typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
cl::sycl::access::address_space::address_space_target>::pointer_t from, Index stride) {\
return get_base_packet<packet_type>::get_pgather(from, stride); \
}
// global space
SYCL_PGATHER(global_space)
// local space
SYCL_PGATHER(local_space)
// private space
//SYCL_PGATHER(private_space)
#undef SYCL_PGATHER
#define SYCL_PGATHER_SPECILIZE(scalar, packet_type)\
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
pgather<scalar, packet_type>(const typename unpacket_traits<packet_type>::type * from, Index stride)\
{ \
return get_base_packet<packet_type>::get_pgather(from, stride); \
}
SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
#undef SYCL_PGATHER_SPECILIZE
#define SYCL_PSCATTER(address_space_target)\
template<typename Scalar, typename packet_type> EIGEN_DEVICE_FUNC inline void pscatter(\
typename cl::sycl::multi_ptr<typename unpacket_traits<packet_type>::type,\
cl::sycl::access::address_space::address_space_target>::pointer_t to,\
const packet_type& from, Index stride) {\
get_base_packet<packet_type>::set_pscatter(to, from, stride);\
}
// global space
SYCL_PSCATTER(global_space)
// local space
SYCL_PSCATTER(local_space)
// private space
//SYCL_PSCATTER(private_space)
#undef SYCL_PSCATTER
#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type)\
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void \
pscatter<scalar, packet_type>(typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride)\
{ \
get_base_packet<packet_type>::set_pscatter(to, from, stride);\
}
SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
#undef SYCL_PSCATTER_SPECILIZE
#define SYCL_PMAD(packet_type)\
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( const packet_type& a,\
const packet_type& b, const packet_type& c){\
return cl::sycl::mad(a,b,c);\
}
SYCL_PMAD(cl::sycl::cl_float4)
SYCL_PMAD(cl::sycl::cl_double2)
#undef SYCL_PMAD
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
return a.x();
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
return a.x();
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
return a.x() + a.y() + a.z() + a.w();
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
return a.x() + a.y();
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
return cl::sycl::fmax(a.x(), a.y());
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
return cl::sycl::fmin(a.x(), a.y());
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
return a.x() * a.y() * a.z() * a.w();
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
return a.x() * a.y();
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
ptranspose(PacketBlock<cl::sycl::cl_float4,4>& kernel) {
float tmp = kernel.packet[0].y();
kernel.packet[0].y() = kernel.packet[1].x();
kernel.packet[1].x() = tmp;
// std::swap(kernel.packet[0].y(), kernel.packet[1].x());
tmp = kernel.packet[0].z();
kernel.packet[0].z() = kernel.packet[2].x();
kernel.packet[2].x() = tmp;
//std::swap(kernel.packet[0].z(), kernel.packet[2].x());
tmp = kernel.packet[0].w();
kernel.packet[0].w() = kernel.packet[3].x();
kernel.packet[3].x() = tmp;
//std::swap(kernel.packet[0].w(), kernel.packet[3].x());
tmp = kernel.packet[1].z();
kernel.packet[1].z() = kernel.packet[2].y();
kernel.packet[2].y() = tmp;
// std::swap(kernel.packet[1].z(), kernel.packet[2].y());
tmp = kernel.packet[1].w();
kernel.packet[1].w() = kernel.packet[3].y();
kernel.packet[3].y() = tmp;
// std::swap(kernel.packet[1].w(), kernel.packet[3].y());
tmp = kernel.packet[2].w();
kernel.packet[2].w() = kernel.packet[3].z();
kernel.packet[3].z() = tmp;
// std::swap(kernel.packet[2].w(), kernel.packet[3].z());
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
ptranspose(PacketBlock<cl::sycl::cl_double2,2>& kernel) {
double tmp = kernel.packet[0].y();
kernel.packet[0].y() = kernel.packet[1].x();
kernel.packet[1].x() = tmp;
//std::swap(kernel.packet[0].y(), kernel.packet[1].x());
}
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
pblend(const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
const cl::sycl::cl_float4& thenPacket, const cl::sycl::cl_float4& elsePacket) {
cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1,
ifPacket.select[1] ? 0 : -1,
ifPacket.select[2] ? 0 : -1,
ifPacket.select[3] ? 0 : -1);
return cl::sycl::select(thenPacket, elsePacket, condition);
}
template<> inline cl::sycl::cl_double2
pblend(const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) {
cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
ifPacket.select[1] ? 0 : -1);
return cl::sycl::select(thenPacket, elsePacket, condition);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_USE_SYCL
#endif // EIGEN_PACKET_MATH_SYCL_H

View File

@ -0,0 +1,89 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Mehdi Goli Codeplay Software Ltd.
// Ralph Potter Codeplay Software Ltd.
// Luke Iwanski Codeplay Software Ltd.
// Contact: <eigen@codeplay.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*****************************************************************
* TypeCasting.h
*
* \brief:
* TypeCasting
*
*****************************************************************/
#ifndef EIGEN_TYPE_CASTING_SYCL_H
#define EIGEN_TYPE_CASTING_SYCL_H
namespace Eigen {
namespace internal {
#ifdef __SYCL_DEVICE_ONLY__
template <>
struct type_casting_traits<float, int> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4 pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {
return a. template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
}
template <>
struct type_casting_traits<int, float> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {
return a. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
}
template <>
struct type_casting_traits<double, float> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 2,
TgtCoeffRatio = 1
};
};
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
auto a1=a. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
auto b1=b. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y());
}
template <>
struct type_casting_traits<float, double> {
enum {
VectorizedCast = 1,
SrcCoeffRatio = 1,
TgtCoeffRatio = 2
};
};
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {
// Simply discard the second half of the input
return cl::sycl::cl_double2(a.x(), a.y());
}
#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TYPE_CASTING_SYCL_H

View File

@ -972,7 +972,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \ EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \ EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
internal::prefetch(blA+(3*K+16)*LhsProgress); \ internal::prefetch(blA+(3*K+16)*LhsProgress); \
if (EIGEN_ARCH_ARM) { internal::prefetch(blB+(4*K+16)*RhsProgress); } /* Bug 953 */ \ if (EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) { internal::prefetch(blB+(4*K+16)*RhsProgress); } /* Bug 953 */ \
traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \ traits.loadLhs(&blA[(0+3*K)*LhsProgress], A0); \
traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \ traits.loadLhs(&blA[(1+3*K)*LhsProgress], A1); \
traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \ traits.loadLhs(&blA[(2+3*K)*LhsProgress], A2); \

View File

@ -518,6 +518,8 @@
#endif #endif
// Does the compiler support C99? // Does the compiler support C99?
// Need to include <cmath> to make sure _GLIBCXX_USE_C99 gets defined
#include <cmath>
#ifndef EIGEN_HAS_C99_MATH #ifndef EIGEN_HAS_C99_MATH
#if EIGEN_MAX_CPP_VER>=11 && \ #if EIGEN_MAX_CPP_VER>=11 && \
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
@ -1074,4 +1076,17 @@ namespace Eigen {
# endif # endif
#endif #endif
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
// The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input.
namespace Eigen {
namespace internal {
bool all(){ return true; }
template<typename T, typename ...Ts>
bool all(T t, Ts ... ts){ return t && all(ts...); }
}
}
#endif
#endif // EIGEN_MACROS_H #endif // EIGEN_MACROS_H

View File

@ -19,6 +19,7 @@ include(CTest)
set(EIGEN_TEST_BUILD_FLAGS "" CACHE STRING "Options passed to the build command of unit tests") set(EIGEN_TEST_BUILD_FLAGS "" CACHE STRING "Options passed to the build command of unit tests")
set(EIGEN_DASHBOARD_BUILD_TARGET "buildtests" CACHE STRING "Target to be built in dashboard mode, default is buildtests") set(EIGEN_DASHBOARD_BUILD_TARGET "buildtests" CACHE STRING "Target to be built in dashboard mode, default is buildtests")
set(EIGEN_CTEST_ERROR_EXCEPTION "" CACHE STRING "Regular expression for build error messages to be filtered out")
# Overwrite default DartConfiguration.tcl such that ctest can build our unit tests. # Overwrite default DartConfiguration.tcl such that ctest can build our unit tests.
# Recall that our unit tests are not in the "all" target, so we have to explicitly ask ctest to build our custom 'buildtests' target. # Recall that our unit tests are not in the "all" target, so we have to explicitly ask ctest to build our custom 'buildtests' target.

View File

@ -79,7 +79,7 @@ These examples are just intended to give the reader a first impression of how fu
\section TopicUsingRefClass How to write generic, but non-templated function? \section TopicUsingRefClass How to write generic, but non-templated function?
In all the previous examples, the functions had to be template functions. This approach allows to write very generic code, but it is often desirable to write non templated function and still keep some level of genericity to avoid stupid copies of the arguments. The typical example is to write functions accepting both a MatrixXf or a block of a MatrixXf. This exactly the purpose of the Ref class. Here is a simple example: In all the previous examples, the functions had to be template functions. This approach allows to write very generic code, but it is often desirable to write non templated functions and still keep some level of genericity to avoid stupid copies of the arguments. The typical example is to write functions accepting both a MatrixXf or a block of a MatrixXf. This is exactly the purpose of the Ref class. Here is a simple example:
<table class="example"> <table class="example">
<tr><th>Example:</th><th>Output:</th></tr> <tr><th>Example:</th><th>Output:</th></tr>

View File

@ -3,5 +3,5 @@ m << 1, 0,
1, 1; 1, 1;
cout << "Comparing m with identity matrix:" << endl; cout << "Comparing m with identity matrix:" << endl;
cout << m.cwiseEqual(MatrixXi::Identity(2,2)) << endl; cout << m.cwiseEqual(MatrixXi::Identity(2,2)) << endl;
int count = m.cwiseEqual(MatrixXi::Identity(2,2)).count(); Index count = m.cwiseEqual(MatrixXi::Identity(2,2)).count();
cout << "Number of coefficients that are equal: " << count << endl; cout << "Number of coefficients that are equal: " << count << endl;

View File

@ -3,5 +3,5 @@ m << 1, 0,
1, 1; 1, 1;
cout << "Comparing m with identity matrix:" << endl; cout << "Comparing m with identity matrix:" << endl;
cout << m.cwiseNotEqual(MatrixXi::Identity(2,2)) << endl; cout << m.cwiseNotEqual(MatrixXi::Identity(2,2)) << endl;
int count = m.cwiseNotEqual(MatrixXi::Identity(2,2)).count(); Index count = m.cwiseNotEqual(MatrixXi::Identity(2,2)).count();
cout << "Number of coefficients that are not equal: " << count << endl; cout << "Number of coefficients that are not equal: " << count << endl;

View File

@ -33,6 +33,9 @@ class AnnoyingScalar
AnnoyingScalar(float _v) { init(); *v = _v; } AnnoyingScalar(float _v) { init(); *v = _v; }
AnnoyingScalar(int _v) { init(); *v = _v; } AnnoyingScalar(int _v) { init(); *v = _v; }
AnnoyingScalar(long _v) { init(); *v = _v; } AnnoyingScalar(long _v) { init(); *v = _v; }
#if EIGEN_HAS_CXX11
AnnoyingScalar(long long _v) { init(); *v = _v; }
#endif
AnnoyingScalar(const AnnoyingScalar& other) { init(); *v = *(other.v); } AnnoyingScalar(const AnnoyingScalar& other) { init(); *v = *(other.v); }
~AnnoyingScalar() { ~AnnoyingScalar() {
if(v!=&data) if(v!=&data)

View File

@ -8,13 +8,27 @@
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// work around "uninitialized" warnings and give that option some testing #if defined(EIGEN_TEST_PART_7)
#define EIGEN_INITIALIZE_MATRICES_BY_ZERO
#ifndef EIGEN_NO_STATIC_ASSERT #ifndef EIGEN_NO_STATIC_ASSERT
#define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them #define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them
#endif #endif
// ignore double-promotion diagnostic for clang and gcc, if we check for static assertion anyway:
// TODO do the same for MSVC?
#if defined(__clang__)
# if (__clang_major__ * 100 + __clang_minor__) >= 308
# pragma clang diagnostic ignored "-Wdouble-promotion"
# endif
#elif defined(__GNUC__)
// TODO is there a minimal GCC version for this? At least g++-4.7 seems to be fine with this.
# pragma GCC diagnostic ignored "-Wdouble-promotion"
#endif
#endif
#if defined(EIGEN_TEST_PART_1) || defined(EIGEN_TEST_PART_2) || defined(EIGEN_TEST_PART_3) #if defined(EIGEN_TEST_PART_1) || defined(EIGEN_TEST_PART_2) || defined(EIGEN_TEST_PART_3)
#ifndef EIGEN_DONT_VECTORIZE #ifndef EIGEN_DONT_VECTORIZE
@ -35,6 +49,28 @@ using namespace std;
VERIFY_IS_APPROX(XPR,REF); \ VERIFY_IS_APPROX(XPR,REF); \
VERIFY( g_called && #XPR" not properly optimized"); VERIFY( g_called && #XPR" not properly optimized");
template<int SizeAtCompileType>
void raise_assertion(Index size = SizeAtCompileType)
{
// VERIFY_RAISES_ASSERT(mf+md); // does not even compile
Matrix<float, SizeAtCompileType, 1> vf; vf.setRandom(size);
Matrix<double, SizeAtCompileType, 1> vd; vd.setRandom(size);
VERIFY_RAISES_ASSERT(vf=vd);
VERIFY_RAISES_ASSERT(vf+=vd);
VERIFY_RAISES_ASSERT(vf-=vd);
VERIFY_RAISES_ASSERT(vd=vf);
VERIFY_RAISES_ASSERT(vd+=vf);
VERIFY_RAISES_ASSERT(vd-=vf);
// vd.asDiagonal() * mf; // does not even compile
// vcd.asDiagonal() * mf; // does not even compile
#if 0 // we get other compilation errors here than just static asserts
VERIFY_RAISES_ASSERT(vd.dot(vf));
#endif
}
template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType) template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
{ {
typedef std::complex<float> CF; typedef std::complex<float> CF;
@ -73,13 +109,6 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
while(std::abs(scf)<epsf) scf = internal::random<CF>(); while(std::abs(scf)<epsf) scf = internal::random<CF>();
while(std::abs(scd)<epsd) scd = internal::random<CD>(); while(std::abs(scd)<epsd) scd = internal::random<CD>();
// VERIFY_RAISES_ASSERT(mf+md); // does not even compile
#ifdef EIGEN_DONT_VECTORIZE
VERIFY_RAISES_ASSERT(vf=vd);
VERIFY_RAISES_ASSERT(vf+=vd);
#endif
// check scalar products // check scalar products
VERIFY_MIX_SCALAR(vcf * sf , vcf * complex<float>(sf)); VERIFY_MIX_SCALAR(vcf * sf , vcf * complex<float>(sf));
VERIFY_MIX_SCALAR(sd * vcd , complex<double>(sd) * vcd); VERIFY_MIX_SCALAR(sd * vcd , complex<double>(sd) * vcd);
@ -119,9 +148,6 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
// check dot product // check dot product
vf.dot(vf); vf.dot(vf);
#if 0 // we get other compilation errors here than just static asserts
VERIFY_RAISES_ASSERT(vd.dot(vf));
#endif
VERIFY_IS_APPROX(vcf.dot(vf), vcf.dot(vf.template cast<complex<float> >())); VERIFY_IS_APPROX(vcf.dot(vf), vcf.dot(vf.template cast<complex<float> >()));
// check diagonal product // check diagonal product
@ -130,9 +156,6 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
VERIFY_IS_APPROX(mcf * vf.asDiagonal(), mcf * vf.template cast<complex<float> >().asDiagonal()); VERIFY_IS_APPROX(mcf * vf.asDiagonal(), mcf * vf.template cast<complex<float> >().asDiagonal());
VERIFY_IS_APPROX(md * vcd.asDiagonal(), md.template cast<complex<double> >() * vcd.asDiagonal()); VERIFY_IS_APPROX(md * vcd.asDiagonal(), md.template cast<complex<double> >() * vcd.asDiagonal());
// vd.asDiagonal() * mf; // does not even compile
// vcd.asDiagonal() * mf; // does not even compile
// check inner product // check inner product
VERIFY_IS_APPROX((vf.transpose() * vcf).value(), (vf.template cast<complex<float> >().transpose() * vcf).value()); VERIFY_IS_APPROX((vf.transpose() * vcf).value(), (vf.template cast<complex<float> >().transpose() * vcf).value());
@ -296,5 +319,10 @@ EIGEN_DECLARE_TEST(mixingtypes)
CALL_SUBTEST_4(mixingtypes<3>()); CALL_SUBTEST_4(mixingtypes<3>());
CALL_SUBTEST_5(mixingtypes<4>()); CALL_SUBTEST_5(mixingtypes<4>());
CALL_SUBTEST_6(mixingtypes<Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE))); CALL_SUBTEST_6(mixingtypes<Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE)));
CALL_SUBTEST_7(raise_assertion<Dynamic>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE)));
} }
CALL_SUBTEST_7(raise_assertion<0>());
CALL_SUBTEST_7(raise_assertion<3>());
CALL_SUBTEST_7(raise_assertion<4>());
CALL_SUBTEST_7(raise_assertion<Dynamic>(0));
} }

View File

@ -112,7 +112,7 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_, IndexTyp
#if EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes> template<typename... IndexTypes>
EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
{ {
// The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor.
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)

View File

@ -98,7 +98,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static const int NumDims = XprType::NumDims; static const int NumDims = XprType::NumDims;
enum { enum {

View File

@ -104,7 +104,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
bool isCopy= false, nByOne = false, oneByN = false; bool isCopy= false, nByOne = false, oneByN = false;
enum { enum {
@ -306,7 +306,13 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
if (isCopy) { if (isCopy) {
#ifdef EIGEN_GPU_COMPILE_PHASE
// See PR 437: on NVIDIA P100 and K20m we observed a x3-4 speed up by enforcing
// unaligned loads here. The reason is unclear though.
return m_impl.template packet<Unaligned>(index);
#else
return m_impl.template packet<LoadMode>(index); return m_impl.template packet<LoadMode>(index);
#endif
} else if (oneByN && !nByOne) { } else if (oneByN && !nByOne) {
return packetNByOne<LoadMode>(index); return packetNByOne<LoadMode>(index);
} else if (!oneByN && nByOne) { } else if (!oneByN && nByOne) {
@ -318,7 +324,12 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
} }
} else { } else {
if (isCopy) { if (isCopy) {
#ifdef EIGEN_GPU_COMPILE_PHASE
// See above.
return m_impl.template packet<Unaligned>(index);
#else
return m_impl.template packet<LoadMode>(index); return m_impl.template packet<LoadMode>(index);
#endif
} else if (oneByN && !nByOne) { } else if (oneByN && !nByOne) {
return packetOneByN<LoadMode>(index); return packetOneByN<LoadMode>(index);
} else if (!oneByN && nByOne) { } else if (!oneByN && nByOne) {

View File

@ -138,7 +138,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
@ -417,7 +417,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,

View File

@ -251,7 +251,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
template<int LoadMode> template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{ {
const int packetSize = internal::unpacket_traits<PacketReturnType>::size; const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
@ -354,7 +354,7 @@ template<typename Axis, typename LeftArgType, typename RightArgType, typename De
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x) void writePacket(Index index, const PacketReturnType& x)
{ {
const int packetSize = internal::unpacket_traits<PacketReturnType>::size; const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize());

View File

@ -177,9 +177,9 @@ struct NoOpOutputKernel {
*/ */
template <typename Index, typename Scalar> template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()( EIGEN_ALWAYS_INLINE void operator()(
const OutputKernel::OutputMapper<Index, Scalar>& output_mapper, const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/,
const TensorContractionParams& params, Index i, Index j, Index num_rows, const TensorContractionParams& /*params*/, Index /*i*/,
Index num_cols) const {} Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {}
}; };
template<typename Indices, typename LhsXprType, typename RhsXprType, typename OutputKernelType = const NoOpOutputKernel> template<typename Indices, typename LhsXprType, typename RhsXprType, typename OutputKernelType = const NoOpOutputKernel>
@ -239,7 +239,7 @@ struct TensorContractionEvaluatorBase
enum { enum {
IsAligned = true, IsAligned = true,
PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false, BlockAccess = false,
Layout = TensorEvaluator<LeftArgType, Device>::Layout, Layout = TensorEvaluator<LeftArgType, Device>::Layout,
CoordAccess = false, // to be implemented CoordAccess = false, // to be implemented
@ -468,42 +468,58 @@ struct TensorContractionEvaluatorBase
} }
} }
EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { #define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS) \
if (this->m_lhs_inner_dim_contiguous) { if (this->m_lhs_inner_dim_contiguous) { \
if (this->m_rhs_inner_dim_contiguous) { if (this->m_rhs_inner_dim_contiguous) { \
if (this->m_rhs_inner_dim_reordered) { if (this->m_rhs_inner_dim_reordered) { \
static_cast<const Derived*>(this)->template evalProduct<true, true, true, Unaligned>(buffer); METHOD<true, true, true, ALIGNMENT>ARGS; \
} } \
else { else { \
static_cast<const Derived*>(this)->template evalProduct<true, true, false, Unaligned>(buffer); METHOD<true, true, false, ALIGNMENT>ARGS; \
} } \
} } \
else { else { \
if (this->m_rhs_inner_dim_reordered) { if (this->m_rhs_inner_dim_reordered) { \
static_cast<const Derived*>(this)->template evalProduct<true, false, true, Unaligned>(buffer); METHOD<true, false, true, ALIGNMENT>ARGS; \
} } \
else { else { \
static_cast<const Derived*>(this)->template evalProduct<true, false, false, Unaligned>(buffer); METHOD<true, false, false, ALIGNMENT>ARGS; \
} } \
} } \
} \
else { \
if (this->m_rhs_inner_dim_contiguous) { \
if (this->m_rhs_inner_dim_reordered) { \
METHOD<false, true, true, ALIGNMENT>ARGS; \
} \
else { \
METHOD<false, true, false, ALIGNMENT>ARGS; \
} \
} \
else { \
if (this->m_rhs_inner_dim_reordered) { \
METHOD<false, false, true, ALIGNMENT>ARGS; \
} \
else { \
METHOD<false, false, false, ALIGNMENT>ARGS; \
} \
} \
} }
else {
if (this->m_rhs_inner_dim_contiguous) { EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const {
if (this->m_rhs_inner_dim_reordered) { static_cast<const Derived*>(this)->template evalProduct<Unaligned>(buffer);
static_cast<const Derived*>(this)->template evalProduct<false, true, true, Unaligned>(buffer); }
}
else { template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,
static_cast<const Derived*>(this)->template evalProduct<false, true, false, Unaligned>(buffer); bool rhs_inner_dim_reordered, int Alignment>
} void evalProductSequential(Scalar* buffer) const {
} if (this->m_j_size == 1) {
else { this->template evalGemv<lhs_inner_dim_contiguous,
if (this->m_rhs_inner_dim_reordered) { rhs_inner_dim_contiguous, rhs_inner_dim_reordered,
static_cast<const Derived*>(this)->template evalProduct<false, false, true, Unaligned>(buffer); Alignment>(buffer);
} } else {
else { this->template evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous,
static_cast<const Derived*>(this)->template evalProduct<false, false, false, Unaligned>(buffer); rhs_inner_dim_reordered, Alignment>(buffer);
}
}
} }
} }
@ -624,7 +640,7 @@ struct TensorContractionEvaluatorBase
OutputMapper output(buffer, m); OutputMapper output(buffer, m);
// Sizes of the blocks to load in cache. See the Goto paper for details. // Sizes of the blocks to load in cache. See the Goto paper for details.
internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, 1); internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index, internal::ShardByCol> blocking(k, m, n, 1);
const Index kc = blocking.kc(); const Index kc = blocking.kc();
const Index mc = numext::mini(m, blocking.mc()); const Index mc = numext::mini(m, blocking.mc());
const Index nc = numext::mini(n, blocking.nc()); const Index nc = numext::mini(n, blocking.nc());
@ -977,14 +993,9 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :
Base(op, device) { } Base(op, device) { }
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> template <int Alignment>
EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const { void evalProduct(Scalar* buffer) const {
if (this->m_j_size == 1) { TENSOR_CONTRACTION_DISPATCH(this->template evalProductSequential, Alignment, (buffer));
this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
return;
}
this->template evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
} }
}; };

View File

@ -21,13 +21,10 @@ enum {
// Default Blocking Strategy // Default Blocking Strategy
template <typename LhsMapper, typename RhsMapper, typename Index, int ShardingType=ShardByCol> template <typename LhsScalar, typename RhsScalar, typename Index, int ShardingType=ShardByCol>
class TensorContractionBlocking { class TensorContractionBlocking {
public: public:
typedef typename LhsMapper::Scalar LhsScalar;
typedef typename RhsMapper::Scalar RhsScalar;
/* /*
adding EIGEN_DEVICE_FUNC unconditionally to 'TensorContractionBlocking' constructor in `TensorContractionBlocking.h` adding EIGEN_DEVICE_FUNC unconditionally to 'TensorContractionBlocking' constructor in `TensorContractionBlocking.h`
requires adding EIGEN_DEVICE_FUNC to `computeProductBlockingSizes` in `GeneralBlockPanelKernel.h` requires adding EIGEN_DEVICE_FUNC to `computeProductBlockingSizes` in `GeneralBlockPanelKernel.h`
@ -41,7 +38,7 @@ class TensorContractionBlocking {
../Eigen/src/Core/products/GeneralBlockPanelKernel.h(57): error #2901: ../Eigen/src/Core/products/GeneralBlockPanelKernel.h(57): error #2901:
dynamic initialization is not supported for function-scope static variables within a __device__/__global__ function dynamic initialization is not supported for function-scope static variables within a __device__/__global__ function
*/ */
#if !defined(EIGEN_HIPCC) #if !defined(EIGEN_HIPCC)
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
#endif #endif

View File

@ -71,8 +71,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
TensorEvaluator(const XprType& op, const Device& device) : TensorEvaluator(const XprType& op, const Device& device) :
Base(op, device) {} Base(op, device) {}
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, template <int Alignment>
bool rhs_inner_dim_reordered, int Alignment>
void evalProduct(Scalar* buffer) const { void evalProduct(Scalar* buffer) const {
const Index m = this->m_i_size; const Index m = this->m_i_size;
const Index n = this->m_j_size; const Index n = this->m_j_size;
@ -96,39 +95,6 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
} }
#endif #endif
typedef
typename internal::remove_const<typename EvalLeftArgType::Scalar>::type
LhsScalar;
typedef
typename internal::remove_const<typename EvalRightArgType::Scalar>::type
RhsScalar;
typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;
typedef internal::TensorContractionInputMapper<
LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t,
contract_t, internal::packet_traits<LhsScalar>::size,
lhs_inner_dim_contiguous, false, Unaligned>
LhsMapper;
typedef internal::TensorContractionInputMapper<
RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t,
contract_t, internal::packet_traits<RhsScalar>::size,
rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned>
RhsMapper;
typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
typedef internal::gemm_pack_lhs<LhsScalar, Index,
typename LhsMapper::SubMapper, Traits::mr,
Traits::LhsProgress, ColMajor>
LhsPacker;
typedef internal::gemm_pack_rhs<
RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor>
RhsPacker;
typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
Traits::mr, Traits::nr, false, false>
GebpKernel;
// Compute a set of algorithm parameters: // Compute a set of algorithm parameters:
// - kernel block sizes (bm, bn, bk) // - kernel block sizes (bm, bn, bk)
// - task grain sizes (number of kernels executed per task: gm, gn) // - task grain sizes (number of kernels executed per task: gm, gn)
@ -158,14 +124,14 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// Again, we don't know number of threads yet, so we use 2. // Again, we don't know number of threads yet, so we use 2.
Index bm, bn, bk; Index bm, bn, bk;
if (shard_by_col) { if (shard_by_col) {
internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index,
internal::ShardByCol> internal::ShardByCol>
blocking(k, m, n, 2); blocking(k, m, n, 2);
bm = blocking.mc(); bm = blocking.mc();
bn = blocking.nc(); bn = blocking.nc();
bk = blocking.kc(); bk = blocking.kc();
} else { } else {
internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index,
internal::ShardByRow> internal::ShardByRow>
blocking(k, m, n, 2); blocking(k, m, n, 2);
bm = blocking.mc(); bm = blocking.mc();
@ -187,29 +153,22 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
if (n == 1) num_threads = 1; if (n == 1) num_threads = 1;
if (num_threads == 1) { if (num_threads == 1) {
// The single-threaded algorithm should be faster in this case. TENSOR_CONTRACTION_DISPATCH(this->template evalProductSequential,
if (n == 1) Unaligned, (buffer));
this->template evalGemv<lhs_inner_dim_contiguous,
rhs_inner_dim_contiguous,
rhs_inner_dim_reordered, Alignment>(buffer);
else
this->template evalGemm<lhs_inner_dim_contiguous,
rhs_inner_dim_contiguous,
rhs_inner_dim_reordered, Alignment>(buffer);
return; return;
} }
// Now that we know number of threads, recalculate sharding and blocking. // Now that we know number of threads, recalculate sharding and blocking.
shard_by_col = shardByCol(m, n, num_threads); shard_by_col = shardByCol(m, n, num_threads);
if (shard_by_col) { if (shard_by_col) {
internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index,
internal::ShardByCol> internal::ShardByCol>
blocking(k, m, n, num_threads); blocking(k, m, n, num_threads);
bm = blocking.mc(); bm = blocking.mc();
bn = blocking.nc(); bn = blocking.nc();
bk = blocking.kc(); bk = blocking.kc();
} else { } else {
internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::TensorContractionBlocking<LhsScalar, RhsScalar, Index,
internal::ShardByRow> internal::ShardByRow>
blocking(k, m, n, num_threads); blocking(k, m, n, num_threads);
bm = blocking.mc(); bm = blocking.mc();
@ -257,34 +216,55 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// more important in this case. // more important in this case.
if ((shard_by_col ? nm : nn) == 1) parallel_pack = false; if ((shard_by_col ? nm : nn) == 1) parallel_pack = false;
LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, #define CONTEXT_ARGS \
this->m_i_strides, this->m_left_contracting_strides, (this, num_threads, buffer, m, n, k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, \
this->m_k_strides); nn0, shard_by_col, parallel_pack) \
.run()
RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, TENSOR_CONTRACTION_DISPATCH(Context, Alignment, CONTEXT_ARGS);
this->m_j_strides, this->m_right_contracting_strides,
this->m_k_strides); #undef CONTEXT_ARGS
Context<LhsPacker, RhsPacker, GebpKernel, LhsMapper, RhsMapper,
OutputMapper>(this, num_threads, lhs, rhs, buffer, m, n,
k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, nn0,
shard_by_col, parallel_pack)
.run();
} }
// Context coordinates a single parallel gemm operation. // Context coordinates a single parallel gemm operation.
template <typename LhsPacker, typename RhsPacker, typename GebpKernel, template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,
typename LhsMapper, typename RhsMapper, typename OutputMapper> bool rhs_inner_dim_reordered, int Alignment>
class Context { class Context {
public: public:
Context(const Self* self, int num_threads, LhsMapper& lhs, typedef internal::TensorContractionInputMapper<
RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm, LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t,
Index bn, Index bk, Index nm, Index nn, Index nk, Index gm, contract_t, internal::packet_traits<LhsScalar>::size,
Index gn, Index nm0, Index nn0, bool shard_by_col, lhs_inner_dim_contiguous, false, Unaligned>
LhsMapper;
typedef internal::TensorContractionInputMapper<
RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t,
contract_t, internal::packet_traits<RhsScalar>::size,
rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned>
RhsMapper;
typedef internal::gemm_pack_lhs<LhsScalar, Index,
typename LhsMapper::SubMapper, Traits::mr,
Traits::LhsProgress, ColMajor>
LhsPacker;
typedef internal::gemm_pack_rhs<
RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor>
RhsPacker;
typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;
typedef internal::gebp_kernel<LhsScalar, RhsScalar, Index, OutputMapper,
Traits::mr, Traits::nr, false, false>
GebpKernel;
Context(const Self* self, int num_threads, Scalar* buffer, Index tm, Index tn,
Index tk, Index bm, Index bn, Index bk, Index nm, Index nn, Index nk,
Index gm, Index gn, Index nm0, Index nn0, bool shard_by_col,
bool parallel_pack) bool parallel_pack)
: device_(self->m_device), : device_(self->m_device),
lhs_(lhs), lhs_(self->m_leftImpl, self->m_left_nocontract_strides,
rhs_(rhs), self->m_i_strides, self->m_left_contracting_strides,
self->m_k_strides),
rhs_(self->m_rightImpl, self->m_right_nocontract_strides,
self->m_j_strides, self->m_right_contracting_strides,
self->m_k_strides),
buffer_(buffer), buffer_(buffer),
output_(buffer, tm), output_(buffer, tm),
output_kernel_(self->m_output_kernel), output_kernel_(self->m_output_kernel),
@ -337,7 +317,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align; divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align;
size_t rhs_size = size_t rhs_size =
divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align; divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align;
packed_mem_ = static_cast<char*>(internal::aligned_malloc( packed_mem_ = static_cast<char*>(device_.allocate(
(nm0_ * lhs_size + nn0_ * rhs_size) * std::min<size_t>(nk_, P - 1))); (nm0_ * lhs_size + nn0_ * rhs_size) * std::min<size_t>(nk_, P - 1)));
char* mem = static_cast<char*>(packed_mem_); char* mem = static_cast<char*>(packed_mem_);
for (Index x = 0; x < numext::mini<Index>(nk_, P - 1); x++) { for (Index x = 0; x < numext::mini<Index>(nk_, P - 1); x++) {
@ -359,7 +339,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m]; for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m];
delete[] state_kernel_[x]; delete[] state_kernel_[x];
} }
internal::aligned_free(packed_mem_); device_.deallocate(packed_mem_);
} }
void run() { void run() {
@ -376,8 +356,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
private: private:
Notification done_; Notification done_;
const Device& device_; const Device& device_;
LhsMapper& lhs_; LhsMapper lhs_;
RhsMapper& rhs_; RhsMapper rhs_;
Scalar* const buffer_; Scalar* const buffer_;
OutputMapper output_; OutputMapper output_;
OutputKernelType output_kernel_; OutputKernelType output_kernel_;

View File

@ -190,7 +190,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType; typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename PacketType<SrcType, Device>::type PacketSourceType; typedef typename PacketType<SrcType, Device>::type PacketSourceType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,

View File

@ -302,7 +302,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned, IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,

View File

@ -87,11 +87,11 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
typedef typename internal::remove_const<typename ArgType::Scalar>::type Scalar; typedef typename internal::remove_const<typename ArgType::Scalar>::type Scalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = (internal::packet_traits<Scalar>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false, BlockAccess = false,
Layout = TensorEvaluator<XprType, Device>::Layout, Layout = TensorEvaluator<XprType, Device>::Layout,
CoordAccess = false, // to be implemented CoordAccess = false, // to be implemented
@ -112,7 +112,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
return false; return false;
} else { } else {
m_result = static_cast<CoeffReturnType*>( m_result = static_cast<CoeffReturnType*>(
m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
evalTo(m_result); evalTo(m_result);
return true; return true;
} }
@ -120,7 +120,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
if (m_result != NULL) { if (m_result != NULL) {
m_device.deallocate(m_result); m_device.deallocate_temp(m_result);
m_result = NULL; m_result = NULL;
} }
} }
@ -249,11 +249,11 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = (internal::packet_traits<Scalar>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false, BlockAccess = false,
Layout = TensorEvaluator<LhsXprType, Device>::Layout, Layout = TensorEvaluator<LhsXprType, Device>::Layout,
CoordAccess = false, // to be implemented CoordAccess = false, // to be implemented
@ -273,7 +273,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
evalTo(data); evalTo(data);
return false; return false;
} else { } else {
m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
evalTo(m_result); evalTo(m_result);
return true; return true;
} }
@ -281,7 +281,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
if (m_result != NULL) { if (m_result != NULL) {
m_device.deallocate(m_result); m_device.deallocate_temp(m_result);
m_result = NULL; m_result = NULL;
} }
} }

View File

@ -20,6 +20,12 @@ struct DefaultDevice {
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
internal::aligned_free(buffer); internal::aligned_free(buffer);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
return allocate(num_bytes);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const {
deallocate(buffer);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
::memcpy(dst, src, n); ::memcpy(dst, src, n);

View File

@ -207,6 +207,15 @@ struct GpuDevice {
stream_->deallocate(buffer); stream_->deallocate(buffer);
} }
EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
return stream_->allocate(num_bytes);
}
EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const {
stream_->deallocate(buffer);
}
EIGEN_STRONG_INLINE void* scratchpad() const { EIGEN_STRONG_INLINE void* scratchpad() const {
return stream_->scratchpad(); return stream_->scratchpad();
} }

View File

@ -105,6 +105,14 @@ struct ThreadPoolDevice {
internal::aligned_free(buffer); internal::aligned_free(buffer);
} }
EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
return allocate(num_bytes);
}
EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const {
deallocate(buffer);
}
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
::memcpy(dst, src, n); ::memcpy(dst, src, n);
} }

View File

@ -41,7 +41,7 @@ template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor>
struct fixed_size_tensor_index_linearization_helper struct fixed_size_tensor_index_linearization_helper
{ {
template <typename Dimensions> EIGEN_DEVICE_FUNC template <typename Dimensions> EIGEN_DEVICE_FUNC
static inline Index run(array<Index, NumIndices> const& indices, static EIGEN_STRONG_INLINE Index run(array<Index, NumIndices> const& indices,
const Dimensions& dimensions) const Dimensions& dimensions)
{ {
return array_get<RowMajor ? n - 1 : (NumIndices - n)>(indices) + return array_get<RowMajor ? n - 1 : (NumIndices - n)>(indices) +
@ -54,7 +54,7 @@ template<typename Index, std::size_t NumIndices, bool RowMajor>
struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
{ {
template <typename Dimensions> EIGEN_DEVICE_FUNC template <typename Dimensions> EIGEN_DEVICE_FUNC
static inline Index run(array<Index, NumIndices> const&, const Dimensions&) static EIGEN_STRONG_INLINE Index run(array<Index, NumIndices> const&, const Dimensions&)
{ {
return 0; return 0;
} }
@ -64,7 +64,7 @@ template<typename Index, std::size_t n>
struct fixed_size_tensor_index_extraction_helper struct fixed_size_tensor_index_extraction_helper
{ {
template <typename Dimensions> EIGEN_DEVICE_FUNC template <typename Dimensions> EIGEN_DEVICE_FUNC
static inline Index run(const Index index, static EIGEN_STRONG_INLINE Index run(const Index index,
const Dimensions& dimensions) const Dimensions& dimensions)
{ {
const Index mult = (index == n-1) ? 1 : 0; const Index mult = (index == n-1) ? 1 : 0;
@ -77,7 +77,7 @@ template<typename Index>
struct fixed_size_tensor_index_extraction_helper<Index, 0> struct fixed_size_tensor_index_extraction_helper<Index, 0>
{ {
template <typename Dimensions> EIGEN_DEVICE_FUNC template <typename Dimensions> EIGEN_DEVICE_FUNC
static inline Index run(const Index, static EIGEN_STRONG_INLINE Index run(const Index,
const Dimensions&) const Dimensions&)
{ {
return 0; return 0;
@ -421,20 +421,20 @@ template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::si
template <typename Dims1, typename Dims2, size_t n, size_t m> template <typename Dims1, typename Dims2, size_t n, size_t m>
struct sizes_match_below_dim { struct sizes_match_below_dim {
static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) {
return false; return false;
} }
}; };
template <typename Dims1, typename Dims2, size_t n> template <typename Dims1, typename Dims2, size_t n>
struct sizes_match_below_dim<Dims1, Dims2, n, n> { struct sizes_match_below_dim<Dims1, Dims2, n, n> {
static EIGEN_DEVICE_FUNC inline bool run(Dims1& dims1, Dims2& dims2) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) {
return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) & return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &
sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2); sizes_match_below_dim<Dims1, Dims2, n-1, n-1>::run(dims1, dims2);
} }
}; };
template <typename Dims1, typename Dims2> template <typename Dims1, typename Dims2>
struct sizes_match_below_dim<Dims1, Dims2, 0, 0> { struct sizes_match_below_dim<Dims1, Dims2, 0, 0> {
static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) {
return true; return true;
} }
}; };

View File

@ -102,7 +102,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
typedef typename XprType::Index Index; typedef typename XprType::Index Index;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,

View File

@ -33,6 +33,7 @@ struct TensorEvaluator
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename Derived::Dimensions Dimensions; typedef typename Derived::Dimensions Dimensions;
typedef Derived XprType; typedef Derived XprType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
// NumDimensions is -1 for variable dim tensors // NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
@ -40,7 +41,7 @@ struct TensorEvaluator
enum { enum {
IsAligned = Derived::IsAligned, IsAligned = Derived::IsAligned,
PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value, BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
Layout = Derived::Layout, Layout = Derived::Layout,
CoordAccess = NumCoords > 0, CoordAccess = NumCoords > 0,
@ -121,7 +122,7 @@ struct TensorEvaluator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
internal::unpacket_traits<PacketReturnType>::size); PacketType<CoeffReturnType, Device>::size);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
@ -188,10 +189,11 @@ struct TensorEvaluator<const Derived, Device>
// NumDimensions is -1 for variable dim tensors // NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
internal::traits<Derived>::NumDimensions : 0; internal::traits<Derived>::NumDimensions : 0;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = Derived::IsAligned, IsAligned = Derived::IsAligned,
PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value, BlockAccess = internal::is_arithmetic<typename internal::remove_const<Scalar>::type>::value,
Layout = Derived::Layout, Layout = Derived::Layout,
CoordAccess = NumCoords > 0, CoordAccess = NumCoords > 0,
@ -249,7 +251,7 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
internal::unpacket_traits<PacketReturnType>::size); PacketType<CoeffReturnType, Device>::size);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
@ -300,7 +302,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType; typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
@ -322,7 +324,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
costPerCoeff(bool vectorized) const { costPerCoeff(bool vectorized) const {
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
internal::unpacket_traits<PacketReturnType>::size); PacketType<CoeffReturnType, Device>::size);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
@ -367,7 +369,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType; typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
@ -445,7 +447,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType; typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
static const int NumDims = internal::array_size< static const int NumDims = internal::array_size<
@ -574,7 +576,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType; typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
@ -644,7 +646,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
enum { enum {
IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned, IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned & TensorEvaluator<ElseArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess & PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess & TensorEvaluator<ElseArgType, Device>::PacketAccess &
internal::packet_traits<Scalar>::HasBlend, PacketType<Scalar, Device>::HasBlend,
BlockAccess = false, BlockAccess = false,
Layout = TensorEvaluator<IfArgType, Device>::Layout, Layout = TensorEvaluator<IfArgType, Device>::Layout,
CoordAccess = false, // to be implemented CoordAccess = false, // to be implemented
@ -665,7 +667,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
typedef typename XprType::Index Index; typedef typename XprType::Index Index;
typedef typename internal::traits<XprType>::Scalar CoeffReturnType; typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const EIGEN_DEVICE_FUNC const Dimensions& dimensions() const

View File

@ -39,7 +39,7 @@ class TensorExecutor {
using StorageIndex = typename Expression::Index; using StorageIndex = typename Expression::Index;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline void run(const Expression& expr, static EIGEN_STRONG_INLINE void run(const Expression& expr,
const Device& device = Device()) { const Device& device = Device()) {
TensorEvaluator<Expression, Device> evaluator(expr, device); TensorEvaluator<Expression, Device> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
@ -63,7 +63,7 @@ class TensorExecutor<Expression, DefaultDevice, /*Vectorizable*/ true,
using StorageIndex = typename Expression::Index; using StorageIndex = typename Expression::Index;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline void run(const Expression& expr, static EIGEN_STRONG_INLINE void run(const Expression& expr,
const DefaultDevice& device = DefaultDevice()) { const DefaultDevice& device = DefaultDevice()) {
TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device); TensorEvaluator<Expression, DefaultDevice> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
@ -111,7 +111,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
static const int NumDims = traits<Expression>::NumDimensions; static const int NumDims = traits<Expression>::NumDimensions;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline void run(const Expression& expr, static EIGEN_STRONG_INLINE void run(const Expression& expr,
const DefaultDevice& device = DefaultDevice()) { const DefaultDevice& device = DefaultDevice()) {
using TensorBlock = using TensorBlock =
TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout>; TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout>;
@ -223,7 +223,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
public: public:
using StorageIndex = typename Expression::Index; using StorageIndex = typename Expression::Index;
static inline void run(const Expression& expr, static EIGEN_STRONG_INLINE void run(const Expression& expr,
const ThreadPoolDevice& device) { const ThreadPoolDevice& device) {
typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator; typedef TensorEvaluator<Expression, ThreadPoolDevice> Evaluator;
typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange; typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange;
@ -257,7 +257,7 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
static const int NumDims = traits<Expression>::NumDimensions; static const int NumDims = traits<Expression>::NumDimensions;
static inline void run(const Expression& expr, static EIGEN_STRONG_INLINE void run(const Expression& expr,
const ThreadPoolDevice& device) { const ThreadPoolDevice& device) {
using TensorBlock = using TensorBlock =
TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout>; TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout>;
@ -376,7 +376,7 @@ EigenMetaKernel(Evaluator eval, StorageIndex size) {
/*static*/ /*static*/
template <typename Expression, bool Vectorizable, bool Tileable> template <typename Expression, bool Vectorizable, bool Tileable>
inline void TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable>::run( EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable>::run(
const Expression& expr, const GpuDevice& device) { const Expression& expr, const GpuDevice& device) {
TensorEvaluator<Expression, GpuDevice> evaluator(expr, device); TensorEvaluator<Expression, GpuDevice> evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
@ -405,7 +405,7 @@ inline void TensorExecutor<Expression, GpuDevice, Vectorizable, Tileable>::run(
template <typename Expression, bool Vectorizable> template <typename Expression, bool Vectorizable>
class TensorExecutor<Expression, SyclDevice, Vectorizable> { class TensorExecutor<Expression, SyclDevice, Vectorizable> {
public: public:
static inline void run(const Expression &expr, const SyclDevice &device) { static EIGEN_STRONG_INLINE void run(const Expression &expr, const SyclDevice &device) {
// call TensorSYCL module // call TensorSYCL module
TensorSycl::run(expr, device); TensorSycl::run(expr, device);
} }

View File

@ -93,11 +93,11 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
typedef typename XprType::Index Index; typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = true, IsAligned = true,
PacketAccess = (PacketSize > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false, BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout, Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = true RawAccess = true
@ -115,7 +115,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
#endif #endif
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
const Index numValues = internal::array_prod(m_impl.dimensions()); const Index numValues = internal::array_prod(m_impl.dimensions());
m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); m_buffer = (CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType));
// Should initialize the memory in case we're dealing with non POD types. // Should initialize the memory in case we're dealing with non POD types.
if (NumTraits<CoeffReturnType>::RequireInitialization) { if (NumTraits<CoeffReturnType>::RequireInitialization) {
for (Index i = 0; i < numValues; ++i) { for (Index i = 0; i < numValues; ++i) {
@ -129,7 +129,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
return true; return true;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
m_device.deallocate(m_buffer); m_device.deallocate_temp(m_buffer);
m_buffer = NULL; m_buffer = NULL;
} }

View File

@ -20,7 +20,7 @@ namespace internal {
template <typename Scalar> template <typename Scalar>
struct scalar_mod_op { struct scalar_mod_op {
EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {} EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {}
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a % m_divisor; }
const Scalar m_divisor; const Scalar m_divisor;
}; };
template <typename Scalar> template <typename Scalar>
@ -34,7 +34,7 @@ struct functor_traits<scalar_mod_op<Scalar> >
template <typename Scalar> template <typename Scalar>
struct scalar_mod2_op { struct scalar_mod2_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op)
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; }
}; };
template <typename Scalar> template <typename Scalar>
struct functor_traits<scalar_mod2_op<Scalar> > struct functor_traits<scalar_mod2_op<Scalar> >

View File

@ -90,7 +90,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false, BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout, Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented CoordAccess = false, // to be implemented
@ -137,7 +137,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
template<int LoadMode> template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{ {
const int packetSize = internal::unpacket_traits<PacketReturnType>::size; const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+packetSize-1 < dimensions().TotalSize()); eigen_assert(index+packetSize-1 < dimensions().TotalSize());

View File

@ -241,7 +241,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
typedef TensorEvaluator<ArgType, Device> Impl; typedef TensorEvaluator<ArgType, Device> Impl;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,

View File

@ -75,10 +75,10 @@ template<DenseIndex n> struct NumTraits<type2index<n> >
MulCost = 1 MulCost = 1
}; };
EIGEN_DEVICE_FUNC static inline Real epsilon() { return 0; } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real epsilon() { return 0; }
EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return 0; } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real dummy_precision() { return 0; }
EIGEN_DEVICE_FUNC static inline Real highest() { return n; } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real highest() { return n; }
EIGEN_DEVICE_FUNC static inline Real lowest() { return n; } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Real lowest() { return n; }
}; };
namespace internal { namespace internal {

View File

@ -85,7 +85,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,

View File

@ -150,6 +150,7 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const
{ {
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(internal::all((Eigen::NumTraits<Index>::highest() >= otherIndices)...));
if (PlainObjectType::Options&RowMajor) { if (PlainObjectType::Options&RowMajor) {
const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}}); const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumIndices>{{firstIndex, secondIndex, otherIndices...}});
return m_data[index]; return m_data[index];
@ -237,6 +238,7 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices)
{ {
static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
eigen_assert(internal::all((Eigen::NumTraits<Index>::highest() >= otherIndices)...));
const std::size_t NumDims = sizeof...(otherIndices) + 2; const std::size_t NumDims = sizeof...(otherIndices) + 2;
if (PlainObjectType::Options&RowMajor) { if (PlainObjectType::Options&RowMajor) {
const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumDims>{{firstIndex, secondIndex, otherIndices...}}); const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumDims>{{firstIndex, secondIndex, otherIndices...}});

View File

@ -617,7 +617,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
template<int LoadMode> template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{ {
const int packetSize = internal::unpacket_traits<PacketReturnType>::size; const int packetSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
@ -814,7 +814,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
return; return;
} }
const int packetSize = internal::unpacket_traits<PacketReturnType>::size; const int packetSize = PacketType<CoeffReturnType, Device>::size;
Index inputIndices[] = {0, 0}; Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1}; Index indices[] = {index, index + packetSize - 1};
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {

View File

@ -91,7 +91,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = true, IsAligned = true,

View File

@ -88,7 +88,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {

View File

@ -472,7 +472,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess; static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,
@ -596,7 +596,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
!RunningOnGPU))) { !RunningOnGPU))) {
bool need_assign = false; bool need_assign = false;
if (!data) { if (!data) {
m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType))); m_result = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType)));
data = m_result; data = m_result;
need_assign = true; need_assign = true;
} }
@ -608,7 +608,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) { if (!data) {
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
m_result = data; m_result = data;
} }
Op reducer(m_reducer); Op reducer(m_reducer);
@ -632,7 +632,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) { if (!data) {
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) {
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
m_result = data; m_result = data;
} }
else { else {
@ -642,7 +642,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
Op reducer(m_reducer); Op reducer(m_reducer);
if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
if (m_result) { if (m_result) {
m_device.deallocate(m_result); m_device.deallocate_temp(m_result);
m_result = NULL; m_result = NULL;
} }
return true; return true;
@ -665,7 +665,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) { if (!data) {
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) {
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
m_result = data; m_result = data;
} }
else { else {
@ -675,7 +675,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
Op reducer(m_reducer); Op reducer(m_reducer);
if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
if (m_result) { if (m_result) {
m_device.deallocate(m_result); m_device.deallocate_temp(m_result);
m_result = NULL; m_result = NULL;
} }
return true; return true;
@ -690,7 +690,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
m_impl.cleanup(); m_impl.cleanup();
if (m_result) { if (m_result) {
m_device.deallocate(m_result); m_device.deallocate_temp(m_result);
m_result = NULL; m_result = NULL;
} }
} }

View File

@ -108,7 +108,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,
@ -266,7 +266,7 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions() const { return this->m_dimensions; } const Dimensions& dimensions() const { return this->m_dimensions; }

View File

@ -95,7 +95,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = false, BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout, Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, CoordAccess = false,

View File

@ -108,11 +108,11 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = (internal::packet_traits<Scalar>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout, Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented CoordAccess = false, // to be implemented
@ -405,11 +405,11 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = (internal::packet_traits<Scalar>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout, Layout = TensorEvaluator<ArgType, Device>::Layout,
RawAccess = false RawAccess = false

View File

@ -107,7 +107,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
@ -287,7 +287,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
{ {

View File

@ -194,7 +194,7 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
enum { enum {
IsAligned = false, IsAligned = false,

View File

@ -104,9 +104,9 @@ template<> struct h_skip_helper_type<0>
template<int n> template<int n>
struct h_skip { struct h_skip {
template<typename T, T... ii> template<typename T, T... ii>
constexpr static inline typename h_skip_helper_numeric<T, n, ii...>::type helper(numeric_list<T, ii...>) { return typename h_skip_helper_numeric<T, n, ii...>::type(); } constexpr static EIGEN_STRONG_INLINE typename h_skip_helper_numeric<T, n, ii...>::type helper(numeric_list<T, ii...>) { return typename h_skip_helper_numeric<T, n, ii...>::type(); }
template<typename... tt> template<typename... tt>
constexpr static inline typename h_skip_helper_type<n, tt...>::type helper(type_list<tt...>) { return typename h_skip_helper_type<n, tt...>::type(); } constexpr static EIGEN_STRONG_INLINE typename h_skip_helper_type<n, tt...>::type helper(type_list<tt...>) { return typename h_skip_helper_type<n, tt...>::type(); }
}; };
template<int n, typename a> struct skip { typedef decltype(h_skip<n>::helper(a())) type; }; template<int n, typename a> struct skip { typedef decltype(h_skip<n>::helper(a())) type; };
@ -268,7 +268,7 @@ template<
typename Reducer typename Reducer
> struct reduce<Reducer> > struct reduce<Reducer>
{ {
EIGEN_DEVICE_FUNC constexpr static inline int run() { return Reducer::Identity; } EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE int run() { return Reducer::Identity; }
}; };
template< template<
@ -276,7 +276,7 @@ template<
typename A typename A
> struct reduce<Reducer, A> > struct reduce<Reducer, A>
{ {
EIGEN_DEVICE_FUNC constexpr static inline A run(A a) { return a; } EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE A run(A a) { return a; }
}; };
template< template<
@ -285,7 +285,7 @@ template<
typename... Ts typename... Ts
> struct reduce<Reducer, A, Ts...> > struct reduce<Reducer, A, Ts...>
{ {
EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce<Reducer, Ts...>::run(ts...))) { EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, Ts... ts) -> decltype(Reducer::run(a, reduce<Reducer, Ts...>::run(ts...))) {
return Reducer::run(a, reduce<Reducer, Ts...>::run(ts...)); return Reducer::run(a, reduce<Reducer, Ts...>::run(ts...));
} }
}; };
@ -293,29 +293,29 @@ template<
/* generic binary operations */ /* generic binary operations */
struct sum_op { struct sum_op {
template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a + b) { return a + b; } template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a + b) { return a + b; }
static constexpr int Identity = 0; static constexpr int Identity = 0;
}; };
struct product_op { struct product_op {
template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static inline auto run(A a, B b) -> decltype(a * b) { return a * b; } template<typename A, typename B> EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a * b) { return a * b; }
static constexpr int Identity = 1; static constexpr int Identity = 1;
}; };
struct logical_and_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a && b) { return a && b; } }; struct logical_and_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a && b) { return a && b; } };
struct logical_or_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a || b) { return a || b; } }; struct logical_or_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a || b) { return a || b; } };
struct equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a == b) { return a == b; } }; struct equal_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a == b) { return a == b; } };
struct not_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a != b) { return a != b; } }; struct not_equal_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a != b) { return a != b; } };
struct lesser_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a < b) { return a < b; } }; struct lesser_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a < b) { return a < b; } };
struct lesser_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a <= b) { return a <= b; } }; struct lesser_equal_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a <= b) { return a <= b; } };
struct greater_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a > b) { return a > b; } }; struct greater_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a > b) { return a > b; } };
struct greater_equal_op { template<typename A, typename B> constexpr static inline auto run(A a, B b) -> decltype(a >= b) { return a >= b; } }; struct greater_equal_op { template<typename A, typename B> constexpr static EIGEN_STRONG_INLINE auto run(A a, B b) -> decltype(a >= b) { return a >= b; } };
/* generic unary operations */ /* generic unary operations */
struct not_op { template<typename A> constexpr static inline auto run(A a) -> decltype(!a) { return !a; } }; struct not_op { template<typename A> constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(!a) { return !a; } };
struct negation_op { template<typename A> constexpr static inline auto run(A a) -> decltype(-a) { return -a; } }; struct negation_op { template<typename A> constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(-a) { return -a; } };
struct greater_equal_zero_op { template<typename A> constexpr static inline auto run(A a) -> decltype(a >= 0) { return a >= 0; } }; struct greater_equal_zero_op { template<typename A> constexpr static EIGEN_STRONG_INLINE auto run(A a) -> decltype(a >= 0) { return a >= 0; } };
/* reductions for lists */ /* reductions for lists */
@ -324,13 +324,13 @@ struct greater_equal_zero_op { template<typename A> constexpr static inline auto
// together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1 // together in front... (13.0 doesn't work with array_prod/array_reduce/... anyway, but 13.1
// does... // does...
template<typename... Ts> template<typename... Ts>
EIGEN_DEVICE_FUNC constexpr inline decltype(reduce<product_op, Ts...>::run((*((Ts*)0))...)) arg_prod(Ts... ts) EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE decltype(reduce<product_op, Ts...>::run((*((Ts*)0))...)) arg_prod(Ts... ts)
{ {
return reduce<product_op, Ts...>::run(ts...); return reduce<product_op, Ts...>::run(ts...);
} }
template<typename... Ts> template<typename... Ts>
constexpr inline decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts... ts) constexpr EIGEN_STRONG_INLINE decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts... ts)
{ {
return reduce<sum_op, Ts...>::run(ts...); return reduce<sum_op, Ts...>::run(ts...);
} }
@ -338,13 +338,13 @@ constexpr inline decltype(reduce<sum_op, Ts...>::run((*((Ts*)0))...)) arg_sum(Ts
/* reverse arrays */ /* reverse arrays */
template<typename Array, int... n> template<typename Array, int... n>
constexpr inline Array h_array_reverse(Array arr, numeric_list<int, n...>) constexpr EIGEN_STRONG_INLINE Array h_array_reverse(Array arr, numeric_list<int, n...>)
{ {
return {{array_get<sizeof...(n) - n - 1>(arr)...}}; return {{array_get<sizeof...(n) - n - 1>(arr)...}};
} }
template<typename T, std::size_t N> template<typename T, std::size_t N>
constexpr inline array<T, N> array_reverse(array<T, N> arr) constexpr EIGEN_STRONG_INLINE array<T, N> array_reverse(array<T, N> arr)
{ {
return h_array_reverse(arr, typename gen_numeric_list<int, N>::type()); return h_array_reverse(arr, typename gen_numeric_list<int, N>::type());
} }
@ -359,7 +359,7 @@ constexpr inline array<T, N> array_reverse(array<T, N> arr)
// an infinite loop) // an infinite loop)
template<typename Reducer, typename T, std::size_t N, std::size_t n = N - 1> template<typename Reducer, typename T, std::size_t N, std::size_t n = N - 1>
struct h_array_reduce { struct h_array_reduce {
EIGEN_DEVICE_FUNC constexpr static inline auto run(array<T, N> arr, T identity) -> decltype(Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr))) EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE auto run(array<T, N> arr, T identity) -> decltype(Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr)))
{ {
return Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr)); return Reducer::run(h_array_reduce<Reducer, T, N, n - 1>::run(arr, identity), array_get<n>(arr));
} }
@ -368,7 +368,7 @@ struct h_array_reduce {
template<typename Reducer, typename T, std::size_t N> template<typename Reducer, typename T, std::size_t N>
struct h_array_reduce<Reducer, T, N, 0> struct h_array_reduce<Reducer, T, N, 0>
{ {
EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, N>& arr, T) EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE T run(const array<T, N>& arr, T)
{ {
return array_get<0>(arr); return array_get<0>(arr);
} }
@ -377,14 +377,14 @@ struct h_array_reduce<Reducer, T, N, 0>
template<typename Reducer, typename T> template<typename Reducer, typename T>
struct h_array_reduce<Reducer, T, 0> struct h_array_reduce<Reducer, T, 0>
{ {
EIGEN_DEVICE_FUNC constexpr static inline T run(const array<T, 0>&, T identity) EIGEN_DEVICE_FUNC constexpr static EIGEN_STRONG_INLINE T run(const array<T, 0>&, T identity)
{ {
return identity; return identity;
} }
}; };
template<typename Reducer, typename T, std::size_t N> template<typename Reducer, typename T, std::size_t N>
EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array<T, N>& arr, T identity) -> decltype(h_array_reduce<Reducer, T, N>::run(arr, identity)) EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_reduce(const array<T, N>& arr, T identity) -> decltype(h_array_reduce<Reducer, T, N>::run(arr, identity))
{ {
return h_array_reduce<Reducer, T, N>::run(arr, identity); return h_array_reduce<Reducer, T, N>::run(arr, identity);
} }
@ -392,13 +392,13 @@ EIGEN_DEVICE_FUNC constexpr inline auto array_reduce(const array<T, N>& arr, T i
/* standard array reductions */ /* standard array reductions */
template<typename T, std::size_t N> template<typename T, std::size_t N>
EIGEN_DEVICE_FUNC constexpr inline auto array_sum(const array<T, N>& arr) -> decltype(array_reduce<sum_op, T, N>(arr, static_cast<T>(0))) EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_sum(const array<T, N>& arr) -> decltype(array_reduce<sum_op, T, N>(arr, static_cast<T>(0)))
{ {
return array_reduce<sum_op, T, N>(arr, static_cast<T>(0)); return array_reduce<sum_op, T, N>(arr, static_cast<T>(0));
} }
template<typename T, std::size_t N> template<typename T, std::size_t N>
EIGEN_DEVICE_FUNC constexpr inline auto array_prod(const array<T, N>& arr) -> decltype(array_reduce<product_op, T, N>(arr, static_cast<T>(1))) EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE auto array_prod(const array<T, N>& arr) -> decltype(array_reduce<product_op, T, N>(arr, static_cast<T>(1)))
{ {
return array_reduce<product_op, T, N>(arr, static_cast<T>(1)); return array_reduce<product_op, T, N>(arr, static_cast<T>(1));
} }
@ -414,13 +414,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) {
/* zip an array */ /* zip an array */
template<typename Op, typename A, typename B, std::size_t N, int... n> template<typename Op, typename A, typename B, std::size_t N, int... n>
constexpr inline array<decltype(Op::run(A(), B())),N> h_array_zip(array<A, N> a, array<B, N> b, numeric_list<int, n...>) constexpr EIGEN_STRONG_INLINE array<decltype(Op::run(A(), B())),N> h_array_zip(array<A, N> a, array<B, N> b, numeric_list<int, n...>)
{ {
return array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }}; return array<decltype(Op::run(A(), B())),N>{{ Op::run(array_get<n>(a), array_get<n>(b))... }};
} }
template<typename Op, typename A, typename B, std::size_t N> template<typename Op, typename A, typename B, std::size_t N>
constexpr inline array<decltype(Op::run(A(), B())),N> array_zip(array<A, N> a, array<B, N> b) constexpr EIGEN_STRONG_INLINE array<decltype(Op::run(A(), B())),N> array_zip(array<A, N> a, array<B, N> b)
{ {
return h_array_zip<Op>(a, b, typename gen_numeric_list<int, N>::type()); return h_array_zip<Op>(a, b, typename gen_numeric_list<int, N>::type());
} }
@ -428,13 +428,13 @@ constexpr inline array<decltype(Op::run(A(), B())),N> array_zip(array<A, N> a, a
/* zip an array and reduce the result */ /* zip an array and reduce the result */
template<typename Reducer, typename Op, typename A, typename B, std::size_t N, int... n> template<typename Reducer, typename Op, typename A, typename B, std::size_t N, int... n>
constexpr inline auto h_array_zip_and_reduce(array<A, N> a, array<B, N> b, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...)) constexpr EIGEN_STRONG_INLINE auto h_array_zip_and_reduce(array<A, N> a, array<B, N> b, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...))
{ {
return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...); return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A(), B()))>::type...>::run(Op::run(array_get<n>(a), array_get<n>(b))...);
} }
template<typename Reducer, typename Op, typename A, typename B, std::size_t N> template<typename Reducer, typename Op, typename A, typename B, std::size_t N>
constexpr inline auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decltype(h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type())) constexpr EIGEN_STRONG_INLINE auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decltype(h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type()))
{ {
return h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type()); return h_array_zip_and_reduce<Reducer, Op, A, B, N>(a, b, typename gen_numeric_list<int, N>::type());
} }
@ -442,13 +442,13 @@ constexpr inline auto array_zip_and_reduce(array<A, N> a, array<B, N> b) -> decl
/* apply stuff to an array */ /* apply stuff to an array */
template<typename Op, typename A, std::size_t N, int... n> template<typename Op, typename A, std::size_t N, int... n>
constexpr inline array<decltype(Op::run(A())),N> h_array_apply(array<A, N> a, numeric_list<int, n...>) constexpr EIGEN_STRONG_INLINE array<decltype(Op::run(A())),N> h_array_apply(array<A, N> a, numeric_list<int, n...>)
{ {
return array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }}; return array<decltype(Op::run(A())),N>{{ Op::run(array_get<n>(a))... }};
} }
template<typename Op, typename A, std::size_t N> template<typename Op, typename A, std::size_t N>
constexpr inline array<decltype(Op::run(A())),N> array_apply(array<A, N> a) constexpr EIGEN_STRONG_INLINE array<decltype(Op::run(A())),N> array_apply(array<A, N> a)
{ {
return h_array_apply<Op>(a, typename gen_numeric_list<int, N>::type()); return h_array_apply<Op>(a, typename gen_numeric_list<int, N>::type());
} }
@ -456,13 +456,13 @@ constexpr inline array<decltype(Op::run(A())),N> array_apply(array<A, N> a)
/* apply stuff to an array and reduce */ /* apply stuff to an array and reduce */
template<typename Reducer, typename Op, typename A, std::size_t N, int... n> template<typename Reducer, typename Op, typename A, std::size_t N, int... n>
constexpr inline auto h_array_apply_and_reduce(array<A, N> arr, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...)) constexpr EIGEN_STRONG_INLINE auto h_array_apply_and_reduce(array<A, N> arr, numeric_list<int, n...>) -> decltype(reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...))
{ {
return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...); return reduce<Reducer, typename id_numeric<int,n,decltype(Op::run(A()))>::type...>::run(Op::run(array_get<n>(arr))...);
} }
template<typename Reducer, typename Op, typename A, std::size_t N> template<typename Reducer, typename Op, typename A, std::size_t N>
constexpr inline auto array_apply_and_reduce(array<A, N> a) -> decltype(h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type())) constexpr EIGEN_STRONG_INLINE auto array_apply_and_reduce(array<A, N> a) -> decltype(h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type()))
{ {
return h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type()); return h_array_apply_and_reduce<Reducer, Op, A, N>(a, typename gen_numeric_list<int, N>::type());
} }
@ -476,7 +476,7 @@ template<int n>
struct h_repeat struct h_repeat
{ {
template<typename t, int... ii> template<typename t, int... ii>
constexpr static inline array<t, n> run(t v, numeric_list<int, ii...>) constexpr static EIGEN_STRONG_INLINE array<t, n> run(t v, numeric_list<int, ii...>)
{ {
return {{ typename id_numeric<int, ii, t>::type(v)... }}; return {{ typename id_numeric<int, ii, t>::type(v)... }};
} }

View File

@ -395,7 +395,6 @@ void matrix_exp_compute(const ArgType& arg, ResultType &result, false_type) // d
template<typename Derived> struct MatrixExponentialReturnValue template<typename Derived> struct MatrixExponentialReturnValue
: public ReturnByValue<MatrixExponentialReturnValue<Derived> > : public ReturnByValue<MatrixExponentialReturnValue<Derived> >
{ {
typedef typename Derived::Index Index;
public: public:
/** \brief Constructor. /** \brief Constructor.
* *

View File

@ -53,7 +53,7 @@ template <typename MatrixType>
typename NumTraits<typename MatrixType::Scalar>::Real matrix_function_compute_mu(const MatrixType& A) typename NumTraits<typename MatrixType::Scalar>::Real matrix_function_compute_mu(const MatrixType& A)
{ {
typedef typename plain_col_type<MatrixType>::type VectorType; typedef typename plain_col_type<MatrixType>::type VectorType;
typename MatrixType::Index rows = A.rows(); Index rows = A.rows();
const MatrixType N = MatrixType::Identity(rows, rows) - A; const MatrixType N = MatrixType::Identity(rows, rows) - A;
VectorType e = VectorType::Ones(rows); VectorType e = VectorType::Ones(rows);
N.template triangularView<Upper>().solveInPlace(e); N.template triangularView<Upper>().solveInPlace(e);
@ -65,7 +65,6 @@ MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
{ {
// TODO: Use that A is upper triangular // TODO: Use that A is upper triangular
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename MatrixType::Index Index;
Index rows = A.rows(); Index rows = A.rows();
Scalar avgEival = A.trace() / Scalar(RealScalar(rows)); Scalar avgEival = A.trace() / Scalar(RealScalar(rows));
MatrixType Ashifted = A - avgEival * MatrixType::Identity(rows, rows); MatrixType Ashifted = A - avgEival * MatrixType::Identity(rows, rows);
@ -131,7 +130,6 @@ typename ListOfClusters::iterator matrix_function_find_cluster(Index key, ListOf
template <typename EivalsType, typename Cluster> template <typename EivalsType, typename Cluster>
void matrix_function_partition_eigenvalues(const EivalsType& eivals, std::list<Cluster>& clusters) void matrix_function_partition_eigenvalues(const EivalsType& eivals, std::list<Cluster>& clusters)
{ {
typedef typename EivalsType::Index Index;
typedef typename EivalsType::RealScalar RealScalar; typedef typename EivalsType::RealScalar RealScalar;
for (Index i=0; i<eivals.rows(); ++i) { for (Index i=0; i<eivals.rows(); ++i) {
// Find cluster containing i-th ei'val, adding a new cluster if necessary // Find cluster containing i-th ei'val, adding a new cluster if necessary
@ -179,7 +177,7 @@ void matrix_function_compute_block_start(const VectorType& clusterSize, VectorTy
{ {
blockStart.resize(clusterSize.rows()); blockStart.resize(clusterSize.rows());
blockStart(0) = 0; blockStart(0) = 0;
for (typename VectorType::Index i = 1; i < clusterSize.rows(); i++) { for (Index i = 1; i < clusterSize.rows(); i++) {
blockStart(i) = blockStart(i-1) + clusterSize(i-1); blockStart(i) = blockStart(i-1) + clusterSize(i-1);
} }
} }
@ -188,7 +186,6 @@ void matrix_function_compute_block_start(const VectorType& clusterSize, VectorTy
template <typename EivalsType, typename ListOfClusters, typename VectorType> template <typename EivalsType, typename ListOfClusters, typename VectorType>
void matrix_function_compute_map(const EivalsType& eivals, const ListOfClusters& clusters, VectorType& eivalToCluster) void matrix_function_compute_map(const EivalsType& eivals, const ListOfClusters& clusters, VectorType& eivalToCluster)
{ {
typedef typename EivalsType::Index Index;
eivalToCluster.resize(eivals.rows()); eivalToCluster.resize(eivals.rows());
Index clusterIndex = 0; Index clusterIndex = 0;
for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) { for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) {
@ -205,7 +202,6 @@ void matrix_function_compute_map(const EivalsType& eivals, const ListOfClusters&
template <typename DynVectorType, typename VectorType> template <typename DynVectorType, typename VectorType>
void matrix_function_compute_permutation(const DynVectorType& blockStart, const DynVectorType& eivalToCluster, VectorType& permutation) void matrix_function_compute_permutation(const DynVectorType& blockStart, const DynVectorType& eivalToCluster, VectorType& permutation)
{ {
typedef typename VectorType::Index Index;
DynVectorType indexNextEntry = blockStart; DynVectorType indexNextEntry = blockStart;
permutation.resize(eivalToCluster.rows()); permutation.resize(eivalToCluster.rows());
for (Index i = 0; i < eivalToCluster.rows(); i++) { for (Index i = 0; i < eivalToCluster.rows(); i++) {
@ -219,7 +215,6 @@ void matrix_function_compute_permutation(const DynVectorType& blockStart, const
template <typename VectorType, typename MatrixType> template <typename VectorType, typename MatrixType>
void matrix_function_permute_schur(VectorType& permutation, MatrixType& U, MatrixType& T) void matrix_function_permute_schur(VectorType& permutation, MatrixType& U, MatrixType& T)
{ {
typedef typename VectorType::Index Index;
for (Index i = 0; i < permutation.rows() - 1; i++) { for (Index i = 0; i < permutation.rows() - 1; i++) {
Index j; Index j;
for (j = i; j < permutation.rows(); j++) { for (j = i; j < permutation.rows(); j++) {
@ -247,7 +242,7 @@ template <typename MatrixType, typename AtomicType, typename VectorType>
void matrix_function_compute_block_atomic(const MatrixType& T, AtomicType& atomic, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT) void matrix_function_compute_block_atomic(const MatrixType& T, AtomicType& atomic, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT)
{ {
fT.setZero(T.rows(), T.cols()); fT.setZero(T.rows(), T.cols());
for (typename VectorType::Index i = 0; i < clusterSize.rows(); ++i) { for (Index i = 0; i < clusterSize.rows(); ++i) {
fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)) fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i))
= atomic.compute(T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i))); = atomic.compute(T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)));
} }
@ -285,7 +280,6 @@ MatrixType matrix_function_solve_triangular_sylvester(const MatrixType& A, const
eigen_assert(C.rows() == A.rows()); eigen_assert(C.rows() == A.rows());
eigen_assert(C.cols() == B.rows()); eigen_assert(C.cols() == B.rows());
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
Index m = A.rows(); Index m = A.rows();
@ -330,11 +324,8 @@ void matrix_function_compute_above_diagonal(const MatrixType& T, const VectorTyp
{ {
typedef internal::traits<MatrixType> Traits; typedef internal::traits<MatrixType> Traits;
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
static const int Options = MatrixType::Options; static const int Options = MatrixType::Options;
typedef Matrix<Scalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType; typedef Matrix<Scalar, Dynamic, Dynamic, Options, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType;
for (Index k = 1; k < clusterSize.rows(); k++) { for (Index k = 1; k < clusterSize.rows(); k++) {
for (Index i = 0; i < clusterSize.rows() - k; i++) { for (Index i = 0; i < clusterSize.rows() - k; i++) {
@ -481,7 +472,6 @@ template<typename Derived> class MatrixFunctionReturnValue
{ {
public: public:
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
typedef typename Derived::Index Index;
typedef typename internal::stem_function<Scalar>::type StemFunction; typedef typename internal::stem_function<Scalar>::type StemFunction;
protected: protected:
@ -506,10 +496,8 @@ template<typename Derived> class MatrixFunctionReturnValue
typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType; typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType;
typedef typename internal::remove_all<NestedEvalType>::type NestedEvalTypeClean; typedef typename internal::remove_all<NestedEvalType>::type NestedEvalTypeClean;
typedef internal::traits<NestedEvalTypeClean> Traits; typedef internal::traits<NestedEvalTypeClean> Traits;
static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType; typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType;
typedef internal::MatrixFunctionAtomic<DynMatrixType> AtomicType; typedef internal::MatrixFunctionAtomic<DynMatrixType> AtomicType;
AtomicType atomic(m_f); AtomicType atomic(m_f);

View File

@ -332,10 +332,8 @@ public:
typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType; typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean; typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
typedef internal::traits<DerivedEvalTypeClean> Traits; typedef internal::traits<DerivedEvalTypeClean> Traits;
static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType; typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, Traits::RowsAtCompileTime, Traits::ColsAtCompileTime> DynMatrixType;
typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType; typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType;
AtomicType atomic; AtomicType atomic;

View File

@ -40,7 +40,6 @@ class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParen
{ {
public: public:
typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
/** /**
* \brief Constructor. * \brief Constructor.
@ -94,7 +93,6 @@ class MatrixPowerAtomic : internal::noncopyable
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::RealScalar RealScalar;
typedef std::complex<RealScalar> ComplexScalar; typedef std::complex<RealScalar> ComplexScalar;
typedef typename MatrixType::Index Index;
typedef Block<MatrixType,Dynamic,Dynamic> ResultType; typedef Block<MatrixType,Dynamic,Dynamic> ResultType;
const MatrixType& m_A; const MatrixType& m_A;
@ -340,7 +338,6 @@ class MatrixPower : internal::noncopyable
private: private:
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::RealScalar RealScalar;
typedef typename MatrixType::Index Index;
public: public:
/** /**
@ -600,7 +597,6 @@ class MatrixPowerReturnValue : public ReturnByValue< MatrixPowerReturnValue<Deri
public: public:
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
typedef typename Derived::RealScalar RealScalar; typedef typename Derived::RealScalar RealScalar;
typedef typename Derived::Index Index;
/** /**
* \brief Constructor. * \brief Constructor.
@ -648,7 +644,6 @@ class MatrixComplexPowerReturnValue : public ReturnByValue< MatrixComplexPowerRe
public: public:
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
typedef typename std::complex<typename Derived::RealScalar> ComplexScalar; typedef typename std::complex<typename Derived::RealScalar> ComplexScalar;
typedef typename Derived::Index Index;
/** /**
* \brief Constructor. * \brief Constructor.

View File

@ -17,7 +17,7 @@ namespace internal {
// pre: T.block(i,i,2,2) has complex conjugate eigenvalues // pre: T.block(i,i,2,2) has complex conjugate eigenvalues
// post: sqrtT.block(i,i,2,2) is square root of T.block(i,i,2,2) // post: sqrtT.block(i,i,2,2) is square root of T.block(i,i,2,2)
template <typename MatrixType, typename ResultType> template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, typename MatrixType::Index i, ResultType& sqrtT) void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, Index i, ResultType& sqrtT)
{ {
// TODO: This case (2-by-2 blocks with complex conjugate eigenvalues) is probably hidden somewhere // TODO: This case (2-by-2 blocks with complex conjugate eigenvalues) is probably hidden somewhere
// in EigenSolver. If we expose it, we could call it directly from here. // in EigenSolver. If we expose it, we could call it directly from here.
@ -32,7 +32,7 @@ void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, typena
// all blocks of sqrtT to left of and below (i,j) are correct // all blocks of sqrtT to left of and below (i,j) are correct
// post: sqrtT(i,j) has the correct value // post: sqrtT(i,j) has the correct value
template <typename MatrixType, typename ResultType> template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT) void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT)
{ {
typedef typename traits<MatrixType>::Scalar Scalar; typedef typename traits<MatrixType>::Scalar Scalar;
Scalar tmp = (sqrtT.row(i).segment(i+1,j-i-1) * sqrtT.col(j).segment(i+1,j-i-1)).value(); Scalar tmp = (sqrtT.row(i).segment(i+1,j-i-1) * sqrtT.col(j).segment(i+1,j-i-1)).value();
@ -41,7 +41,7 @@ void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, ty
// similar to compute1x1offDiagonalBlock() // similar to compute1x1offDiagonalBlock()
template <typename MatrixType, typename ResultType> template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT) void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT)
{ {
typedef typename traits<MatrixType>::Scalar Scalar; typedef typename traits<MatrixType>::Scalar Scalar;
Matrix<Scalar,1,2> rhs = T.template block<1,2>(i,j); Matrix<Scalar,1,2> rhs = T.template block<1,2>(i,j);
@ -54,7 +54,7 @@ void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, ty
// similar to compute1x1offDiagonalBlock() // similar to compute1x1offDiagonalBlock()
template <typename MatrixType, typename ResultType> template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT) void matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT)
{ {
typedef typename traits<MatrixType>::Scalar Scalar; typedef typename traits<MatrixType>::Scalar Scalar;
Matrix<Scalar,2,1> rhs = T.template block<2,1>(i,j); Matrix<Scalar,2,1> rhs = T.template block<2,1>(i,j);
@ -101,7 +101,7 @@ void matrix_sqrt_quasi_triangular_solve_auxiliary_equation(MatrixType& X, const
// similar to compute1x1offDiagonalBlock() // similar to compute1x1offDiagonalBlock()
template <typename MatrixType, typename ResultType> template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT) void matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(const MatrixType& T, Index i, Index j, ResultType& sqrtT)
{ {
typedef typename traits<MatrixType>::Scalar Scalar; typedef typename traits<MatrixType>::Scalar Scalar;
Matrix<Scalar,2,2> A = sqrtT.template block<2,2>(i,i); Matrix<Scalar,2,2> A = sqrtT.template block<2,2>(i,i);
@ -120,7 +120,6 @@ template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_diagonal(const MatrixType& T, ResultType& sqrtT) void matrix_sqrt_quasi_triangular_diagonal(const MatrixType& T, ResultType& sqrtT)
{ {
using std::sqrt; using std::sqrt;
typedef typename MatrixType::Index Index;
const Index size = T.rows(); const Index size = T.rows();
for (Index i = 0; i < size; i++) { for (Index i = 0; i < size; i++) {
if (i == size - 1 || T.coeff(i+1, i) == 0) { if (i == size - 1 || T.coeff(i+1, i) == 0) {
@ -139,7 +138,6 @@ void matrix_sqrt_quasi_triangular_diagonal(const MatrixType& T, ResultType& sqrt
template <typename MatrixType, typename ResultType> template <typename MatrixType, typename ResultType>
void matrix_sqrt_quasi_triangular_off_diagonal(const MatrixType& T, ResultType& sqrtT) void matrix_sqrt_quasi_triangular_off_diagonal(const MatrixType& T, ResultType& sqrtT)
{ {
typedef typename MatrixType::Index Index;
const Index size = T.rows(); const Index size = T.rows();
for (Index j = 1; j < size; j++) { for (Index j = 1; j < size; j++) {
if (T.coeff(j, j-1) != 0) // if T(j-1:j, j-1:j) is a 2-by-2 block if (T.coeff(j, j-1) != 0) // if T(j-1:j, j-1:j) is a 2-by-2 block
@ -206,8 +204,7 @@ template <typename MatrixType, typename ResultType>
void matrix_sqrt_triangular(const MatrixType &arg, ResultType &result) void matrix_sqrt_triangular(const MatrixType &arg, ResultType &result)
{ {
using std::sqrt; using std::sqrt;
typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Scalar Scalar;
eigen_assert(arg.rows() == arg.cols()); eigen_assert(arg.rows() == arg.cols());
@ -318,7 +315,6 @@ template<typename Derived> class MatrixSquareRootReturnValue
: public ReturnByValue<MatrixSquareRootReturnValue<Derived> > : public ReturnByValue<MatrixSquareRootReturnValue<Derived> >
{ {
protected: protected:
typedef typename Derived::Index Index;
typedef typename internal::ref_selector<Derived>::type DerivedNested; typedef typename internal::ref_selector<Derived>::type DerivedNested;
public: public:

View File

@ -89,13 +89,13 @@ class companion
{ {
const Index deg = m_monic.size(); const Index deg = m_monic.size();
const Index deg_1 = deg-1; const Index deg_1 = deg-1;
DenseCompanionMatrixType companion(deg,deg); DenseCompanionMatrixType companMat(deg,deg);
companion << companMat <<
( LeftBlock(deg,deg_1) ( LeftBlock(deg,deg_1)
<< LeftBlockFirstRow::Zero(1,deg_1), << LeftBlockFirstRow::Zero(1,deg_1),
BottomLeftBlock::Identity(deg-1,deg-1)*m_bl_diag.asDiagonal() ).finished() BottomLeftBlock::Identity(deg-1,deg-1)*m_bl_diag.asDiagonal() ).finished()
, m_monic; , m_monic;
return companion; return companMat;
} }

View File

@ -24,7 +24,7 @@ namespace Eigen {
* \sa Eigen::igammac(), Eigen::lgamma() * \sa Eigen::igammac(), Eigen::lgamma()
*/ */
template<typename Derived,typename ExponentDerived> template<typename Derived,typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived> EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
{ {
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>( return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
@ -47,7 +47,7 @@ igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerive
* \sa Eigen::igamma(), Eigen::lgamma() * \sa Eigen::igamma(), Eigen::lgamma()
*/ */
template <typename Derived, typename ExponentDerived> template <typename Derived, typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived> EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igamma_der_a(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) { igamma_der_a(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) {
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived>( return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_der_a_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
a.derived(), a.derived(),
@ -68,7 +68,7 @@ igamma_der_a(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<Exponent
* \sa Eigen::igamma(), Eigen::lgamma() * \sa Eigen::igamma(), Eigen::lgamma()
*/ */
template <typename AlphaDerived, typename SampleDerived> template <typename AlphaDerived, typename SampleDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived> EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived>
gamma_sample_der_alpha(const Eigen::ArrayBase<AlphaDerived>& alpha, const Eigen::ArrayBase<SampleDerived>& sample) { gamma_sample_der_alpha(const Eigen::ArrayBase<AlphaDerived>& alpha, const Eigen::ArrayBase<SampleDerived>& sample) {
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived>( return Eigen::CwiseBinaryOp<Eigen::internal::scalar_gamma_sample_der_alpha_op<typename AlphaDerived::Scalar>, const AlphaDerived, const SampleDerived>(
alpha.derived(), alpha.derived(),
@ -86,7 +86,7 @@ gamma_sample_der_alpha(const Eigen::ArrayBase<AlphaDerived>& alpha, const Eigen:
* \sa Eigen::igamma(), Eigen::lgamma() * \sa Eigen::igamma(), Eigen::lgamma()
*/ */
template<typename Derived,typename ExponentDerived> template<typename Derived,typename ExponentDerived>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived> EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x) igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
{ {
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>( return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
@ -108,7 +108,7 @@ igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDeriv
// * \warning Be careful with the order of the parameters: x.polygamma(n) is equivalent to polygamma(n,x) // * \warning Be careful with the order of the parameters: x.polygamma(n) is equivalent to polygamma(n,x)
// * \sa ArrayBase::polygamma() // * \sa ArrayBase::polygamma()
template<typename DerivedN,typename DerivedX> template<typename DerivedN,typename DerivedX>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX> EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX>
polygamma(const Eigen::ArrayBase<DerivedN>& n, const Eigen::ArrayBase<DerivedX>& x) polygamma(const Eigen::ArrayBase<DerivedN>& n, const Eigen::ArrayBase<DerivedX>& x)
{ {
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX>( return Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX>(
@ -128,7 +128,7 @@ polygamma(const Eigen::ArrayBase<DerivedN>& n, const Eigen::ArrayBase<DerivedX>&
* \sa Eigen::betainc(), Eigen::lgamma() * \sa Eigen::betainc(), Eigen::lgamma()
*/ */
template<typename ArgADerived, typename ArgBDerived, typename ArgXDerived> template<typename ArgADerived, typename ArgBDerived, typename ArgXDerived>
inline const Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived> EIGEN_STRONG_INLINE const Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived>
betainc(const Eigen::ArrayBase<ArgADerived>& a, const Eigen::ArrayBase<ArgBDerived>& b, const Eigen::ArrayBase<ArgXDerived>& x) betainc(const Eigen::ArrayBase<ArgADerived>& a, const Eigen::ArrayBase<ArgBDerived>& b, const Eigen::ArrayBase<ArgXDerived>& x)
{ {
return Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived>( return Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived>(
@ -152,7 +152,7 @@ betainc(const Eigen::ArrayBase<ArgADerived>& a, const Eigen::ArrayBase<ArgBDeriv
* \sa ArrayBase::zeta() * \sa ArrayBase::zeta()
*/ */
template<typename DerivedX,typename DerivedQ> template<typename DerivedX,typename DerivedQ>
inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ> EIGEN_STRONG_INLINE const Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ>
zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q) zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q)
{ {
return Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ>( return Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ>(
@ -176,7 +176,7 @@ zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q)
* \sa ArrayBase::i0e() * \sa ArrayBase::i0e()
*/ */
template <typename Derived> template <typename Derived>
inline const Eigen::CwiseUnaryOp< EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
Eigen::internal::scalar_i0e_op<typename Derived::Scalar>, const Derived> Eigen::internal::scalar_i0e_op<typename Derived::Scalar>, const Derived>
i0e(const Eigen::ArrayBase<Derived>& x) { i0e(const Eigen::ArrayBase<Derived>& x) {
return Eigen::CwiseUnaryOp< return Eigen::CwiseUnaryOp<
@ -199,7 +199,7 @@ i0e(const Eigen::ArrayBase<Derived>& x) {
* \sa ArrayBase::i1e() * \sa ArrayBase::i1e()
*/ */
template <typename Derived> template <typename Derived>
inline const Eigen::CwiseUnaryOp< EIGEN_STRONG_INLINE const Eigen::CwiseUnaryOp<
Eigen::internal::scalar_i1e_op<typename Derived::Scalar>, const Derived> Eigen::internal::scalar_i1e_op<typename Derived::Scalar>, const Derived>
i1e(const Eigen::ArrayBase<Derived>& x) { i1e(const Eigen::ArrayBase<Derived>& x) {
return Eigen::CwiseUnaryOp< return Eigen::CwiseUnaryOp<

View File

@ -155,11 +155,11 @@ struct functor_traits<scalar_betainc_op<Scalar> > {
*/ */
template<typename Scalar> struct scalar_lgamma_op { template<typename Scalar> struct scalar_lgamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
using numext::lgamma; return lgamma(a); using numext::lgamma; return lgamma(a);
} }
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_lgamma_op<Scalar> > struct functor_traits<scalar_lgamma_op<Scalar> >
@ -177,11 +177,11 @@ struct functor_traits<scalar_lgamma_op<Scalar> >
*/ */
template<typename Scalar> struct scalar_digamma_op { template<typename Scalar> struct scalar_digamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
using numext::digamma; return digamma(a); using numext::digamma; return digamma(a);
} }
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pdigamma(a); }
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_digamma_op<Scalar> > struct functor_traits<scalar_digamma_op<Scalar> >
@ -199,11 +199,11 @@ struct functor_traits<scalar_digamma_op<Scalar> >
*/ */
template<typename Scalar> struct scalar_zeta_op { template<typename Scalar> struct scalar_zeta_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& q) const {
using numext::zeta; return zeta(x, q); using numext::zeta; return zeta(x, q);
} }
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); }
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_zeta_op<Scalar> > struct functor_traits<scalar_zeta_op<Scalar> >
@ -221,11 +221,11 @@ struct functor_traits<scalar_zeta_op<Scalar> >
*/ */
template<typename Scalar> struct scalar_polygamma_op { template<typename Scalar> struct scalar_polygamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& n, const Scalar& x) const {
using numext::polygamma; return polygamma(n, x); using numext::polygamma; return polygamma(n, x);
} }
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); }
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_polygamma_op<Scalar> > struct functor_traits<scalar_polygamma_op<Scalar> >
@ -244,11 +244,11 @@ struct functor_traits<scalar_polygamma_op<Scalar> >
*/ */
template<typename Scalar> struct scalar_erf_op { template<typename Scalar> struct scalar_erf_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
using numext::erf; return erf(a); using numext::erf; return erf(a);
} }
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::perf(a); }
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_erf_op<Scalar> > struct functor_traits<scalar_erf_op<Scalar> >
@ -267,11 +267,11 @@ struct functor_traits<scalar_erf_op<Scalar> >
*/ */
template<typename Scalar> struct scalar_erfc_op { template<typename Scalar> struct scalar_erfc_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
using numext::erfc; return erfc(a); using numext::erfc; return erfc(a);
} }
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::perfc(a); }
}; };
template<typename Scalar> template<typename Scalar>
struct functor_traits<scalar_erfc_op<Scalar> > struct functor_traits<scalar_erfc_op<Scalar> >
@ -291,12 +291,12 @@ struct functor_traits<scalar_erfc_op<Scalar> >
template <typename Scalar> template <typename Scalar>
struct scalar_i0e_op { struct scalar_i0e_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_i0e_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_i0e_op)
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
using numext::i0e; using numext::i0e;
return i0e(x); return i0e(x);
} }
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
return internal::pi0e(x); return internal::pi0e(x);
} }
}; };
@ -318,12 +318,12 @@ struct functor_traits<scalar_i0e_op<Scalar> > {
template <typename Scalar> template <typename Scalar>
struct scalar_i1e_op { struct scalar_i1e_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_i1e_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_i1e_op)
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& x) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& x) const {
using numext::i1e; using numext::i1e;
return i1e(x); return i1e(x);
} }
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
return internal::pi1e(x); return internal::pi1e(x);
} }
}; };

View File

@ -61,14 +61,14 @@ template <typename T>
void RandomFill(std::vector<T> & vec) void RandomFill(std::vector<T> & vec)
{ {
for (size_t k=0;k<vec.size();++k) for (size_t k=0;k<vec.size();++k)
vec[k] = T( rand() )/T(RAND_MAX) - .5; vec[k] = T( rand() )/T(RAND_MAX) - T(.5);
} }
template <typename T> template <typename T>
void RandomFill(std::vector<std::complex<T> > & vec) void RandomFill(std::vector<std::complex<T> > & vec)
{ {
for (size_t k=0;k<vec.size();++k) for (size_t k=0;k<vec.size();++k)
vec[k] = std::complex<T> ( T( rand() )/T(RAND_MAX) - .5, T( rand() )/T(RAND_MAX) - .5); vec[k] = std::complex<T> ( T( rand() )/T(RAND_MAX) - T(.5), T( rand() )/T(RAND_MAX) - T(.5));
} }
template <typename T_time,typename T_freq> template <typename T_time,typename T_freq>
@ -85,7 +85,7 @@ void fwd_inv(size_t nfft)
vector<T_time> timebuf2; vector<T_time> timebuf2;
fft.inv(timebuf2,freqbuf); fft.inv(timebuf2,freqbuf);
long double rmse = mag2(timebuf - timebuf2) / mag2(timebuf); T_time rmse = mag2(timebuf - timebuf2) / mag2(timebuf);
cout << "roundtrip rmse: " << rmse << endl; cout << "roundtrip rmse: " << rmse << endl;
} }

View File

@ -18,7 +18,7 @@ static void test_create_destroy_empty_pool()
// Just create and destroy the pool. This will wind up and tear down worker // Just create and destroy the pool. This will wind up and tear down worker
// threads. Ensure there are no issues in that logic. // threads. Ensure there are no issues in that logic.
for (int i = 0; i < 16; ++i) { for (int i = 0; i < 16; ++i) {
NonBlockingThreadPool tp(i); ThreadPool tp(i);
} }
} }
@ -27,7 +27,7 @@ static void test_parallelism(bool allow_spinning)
{ {
// Test we never-ever fail to match available tasks with idle threads. // Test we never-ever fail to match available tasks with idle threads.
const int kThreads = 16; // code below expects that this is a multiple of 4 const int kThreads = 16; // code below expects that this is a multiple of 4
NonBlockingThreadPool tp(kThreads, allow_spinning); ThreadPool tp(kThreads, allow_spinning);
VERIFY_IS_EQUAL(tp.NumThreads(), kThreads); VERIFY_IS_EQUAL(tp.NumThreads(), kThreads);
VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1); VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1);
for (int iter = 0; iter < 100; ++iter) { for (int iter = 0; iter < 100; ++iter) {
@ -104,7 +104,7 @@ static void test_parallelism(bool allow_spinning)
static void test_cancel() static void test_cancel()
{ {
NonBlockingThreadPool tp(2); ThreadPool tp(2);
// Schedule a large number of closure that each sleeps for one second. This // Schedule a large number of closure that each sleeps for one second. This
// will keep the thread pool busy for much longer than the default test timeout. // will keep the thread pool busy for much longer than the default test timeout.