Merged eigen/eigen into default

This commit is contained in:
Benoit Steiner 2016-05-03 13:15:00 -07:00
commit 4c05fb03a3
29 changed files with 320 additions and 137 deletions

View File

@ -256,12 +256,13 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
enum { enum {
outer = Index / DstXprType::InnerSizeAtCompileTime, outer = Index / DstXprType::InnerSizeAtCompileTime,
inner = Index % DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime,
JointAlignment = Kernel::AssignmentTraits::JointAlignment JointAlignment = Kernel::AssignmentTraits::JointAlignment,
DefaultAlignment = unpacket_traits<PacketType>::alignment
}; };
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{ {
kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner); kernel.template assignPacketByOuterInner<DefaultAlignment, JointAlignment, PacketType>(outer, inner);
enum { NextIndex = Index + unpacket_traits<PacketType>::size }; enum { NextIndex = Index + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel); copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
} }
@ -277,9 +278,12 @@ template<typename Kernel, int Index_, int Stop>
struct copy_using_evaluator_innervec_InnerUnrolling struct copy_using_evaluator_innervec_InnerUnrolling
{ {
typedef typename Kernel::PacketType PacketType; typedef typename Kernel::PacketType PacketType;
enum {
DefaultAlignment = unpacket_traits<PacketType>::alignment
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
{ {
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_); kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, Index_);
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size }; enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer); copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
} }
@ -433,6 +437,9 @@ template<typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
{ {
typedef typename Kernel::PacketType PacketType; typedef typename Kernel::PacketType PacketType;
enum {
DefaultAlignment = unpacket_traits<PacketType>::alignment
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
{ {
const Index innerSize = kernel.innerSize(); const Index innerSize = kernel.innerSize();
@ -440,7 +447,7 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
const Index packetSize = unpacket_traits<PacketType>::size; const Index packetSize = unpacket_traits<PacketType>::size;
for(Index outer = 0; outer < outerSize; ++outer) for(Index outer = 0; outer < outerSize; ++outer)
for(Index inner = 0; inner < innerSize; inner+=packetSize) for(Index inner = 0; inner < innerSize; inner+=packetSize)
kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner); kernel.template assignPacketByOuterInner<DefaultAlignment, DefaultAlignment, PacketType>(outer, inner);
} }
}; };

View File

@ -46,6 +46,8 @@
// Make our own __half definition that is similar to CUDA's. // Make our own __half definition that is similar to CUDA's.
struct __half { struct __half {
__half() {}
explicit __half(unsigned short raw) : x(raw) {}
unsigned short x; unsigned short x;
}; };
@ -292,7 +294,8 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff)
const FP32 f16max = { (127 + 16) << 23 }; const FP32 f16max = { (127 + 16) << 23 };
const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 }; const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
unsigned int sign_mask = 0x80000000u; unsigned int sign_mask = 0x80000000u;
__half o = { 0 }; __half o;
o.x = static_cast<unsigned short>(0x0u);
unsigned int sign = f.u & sign_mask; unsigned int sign = f.u & sign_mask;
f.u ^= sign; f.u ^= sign;

View File

@ -375,7 +375,7 @@
#define EIGEN_HAS_CONSTEXPR 1 #define EIGEN_HAS_CONSTEXPR 1
#endif #endif
#elif __has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ #elif __has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
EIGEN_GNUC_AT_LEAST(4,8) (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L))
#define EIGEN_HAS_CONSTEXPR 1 #define EIGEN_HAS_CONSTEXPR 1
#endif #endif

View File

@ -36,7 +36,7 @@ struct traits<TensorAssignOp<LhsXprType, RhsXprType> >
static const int Layout = internal::traits<LhsXprType>::Layout; static const int Layout = internal::traits<LhsXprType>::Layout;
enum { enum {
Flags = 0, Flags = 0
}; };
}; };
@ -100,7 +100,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned, IsAligned = TensorEvaluator<LeftArgType, Device>::IsAligned & TensorEvaluator<RightArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
Layout = TensorEvaluator<LeftArgType, Device>::Layout, Layout = TensorEvaluator<LeftArgType, Device>::Layout,
RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess, RawAccess = TensorEvaluator<LeftArgType, Device>::RawAccess
}; };
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) :

View File

@ -41,7 +41,7 @@ struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >
static const int Layout = traits<LhsXprType>::Layout; static const int Layout = traits<LhsXprType>::Layout;
enum { enum {
Flags = 0, Flags = 0
}; };
}; };
@ -588,7 +588,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
enum { enum {
Layout = TensorEvaluator<LeftArgType, Device>::Layout, Layout = TensorEvaluator<LeftArgType, Device>::Layout
}; };
// Most of the code is assuming that both input tensors are ColMajor. If the // Most of the code is assuming that both input tensors are ColMajor. If the

View File

@ -543,12 +543,12 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
#define prefetch_lhs(reg, row, col) \ #define prefetch_lhs(reg, row, col) \
if (!CHECK_LHS_BOUNDARY) { \ if (!CHECK_LHS_BOUNDARY) { \
if (col < k_size) { \ if (col < k_size) { \
reg =lhs.loadPacket(row, col); \ reg =lhs.loadPacket<Unaligned>(row, col); \
} \ } \
} else { \ } else { \
if (col < k_size) { \ if (col < k_size) { \
if (row + 3 < m_size) { \ if (row + 3 < m_size) { \
reg =lhs.loadPacket(row, col); \ reg =lhs.loadPacket<Unaligned>(row, col); \
} else if (row + 2 < m_size) { \ } else if (row + 2 < m_size) { \
reg.x =lhs(row + 0, col); \ reg.x =lhs(row + 0, col); \
reg.y =lhs(row + 1, col); \ reg.y =lhs(row + 1, col); \
@ -578,7 +578,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
if (!CHECK_RHS_BOUNDARY) { if (!CHECK_RHS_BOUNDARY) {
if ((rhs_vert + 3) < k_size) { if ((rhs_vert + 3) < k_size) {
// just CHECK_RHS_BOUNDARY // just CHECK_RHS_BOUNDARY
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
} else if (rhs_vert + 2 < k_size) { } else if (rhs_vert + 2 < k_size) {
// just CHECK_RHS_BOUNDARY // just CHECK_RHS_BOUNDARY
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
@ -593,7 +593,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
} else { } else {
if (rhs_horiz0 < n_size) { if (rhs_horiz0 < n_size) {
if ((rhs_vert + 3) < k_size) { if ((rhs_vert + 3) < k_size) {
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
} else if ((rhs_vert + 2) < k_size) { } else if ((rhs_vert + 2) < k_size) {
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
@ -790,37 +790,37 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
if (!CHECK_LHS_BOUNDARY) { if (!CHECK_LHS_BOUNDARY) {
if ((threadIdx.y/4+k+24) < k_size) { if ((threadIdx.y/4+k+24) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); lhs_pf3 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+24));
} else if ((threadIdx.y/4+k+16) < k_size) { } else if ((threadIdx.y/4+k+16) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
} else if ((threadIdx.y/4+k+8) < k_size) { } else if ((threadIdx.y/4+k+8) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
} else if ((threadIdx.y/4+k) < k_size) { } else if ((threadIdx.y/4+k) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
} }
} else { } else {
// just CHECK_LHS_BOUNDARY // just CHECK_LHS_BOUNDARY
if (lhs_vert + 3 < m_size) { if (lhs_vert + 3 < m_size) {
if ((threadIdx.y/4+k+24) < k_size) { if ((threadIdx.y/4+k+24) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); lhs_pf3 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+24));
} else if ((threadIdx.y/4+k+16) < k_size) { } else if ((threadIdx.y/4+k+16) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
} else if ((threadIdx.y/4+k+8) < k_size) { } else if ((threadIdx.y/4+k+8) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
} else if ((threadIdx.y/4+k) < k_size) { } else if ((threadIdx.y/4+k) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
} }
} else if (lhs_vert + 2 < m_size) { } else if (lhs_vert + 2 < m_size) {
if ((threadIdx.y/4+k+24) < k_size) { if ((threadIdx.y/4+k+24) < k_size) {
@ -909,8 +909,8 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
if (!CHECK_RHS_BOUNDARY) { if (!CHECK_RHS_BOUNDARY) {
if ((rhs_vert + 3) < k_size) { if ((rhs_vert + 3) < k_size) {
// just CHECK_RHS_BOUNDARY // just CHECK_RHS_BOUNDARY
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); rhs_pf1 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz1);
} else if (rhs_vert + 2 < k_size) { } else if (rhs_vert + 2 < k_size) {
// just CHECK_RHS_BOUNDARY // just CHECK_RHS_BOUNDARY
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
@ -932,8 +932,8 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
if (rhs_horiz1 < n_size) { if (rhs_horiz1 < n_size) {
if ((rhs_vert + 3) < k_size) { if ((rhs_vert + 3) < k_size) {
// just CHECK_RHS_BOUNDARY // just CHECK_RHS_BOUNDARY
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); rhs_pf1 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz1);
} else if (rhs_vert + 2 < k_size) { } else if (rhs_vert + 2 < k_size) {
// just CHECK_RHS_BOUNDARY // just CHECK_RHS_BOUNDARY
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
@ -954,7 +954,7 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
} else if (rhs_horiz0 < n_size) { } else if (rhs_horiz0 < n_size) {
if ((rhs_vert + 3) < k_size) { if ((rhs_vert + 3) < k_size) {
// just CHECK_RHS_BOUNDARY // just CHECK_RHS_BOUNDARY
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
} else if ((rhs_vert + 2) < k_size) { } else if ((rhs_vert + 2) < k_size) {
// just CHECK_RHS_BOUNDARY // just CHECK_RHS_BOUNDARY
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);

View File

@ -16,7 +16,7 @@ namespace internal {
enum { enum {
Rhs = 0, Rhs = 0,
Lhs = 1, Lhs = 1
}; };
/* /*
@ -233,7 +233,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
typedef typename Tensor::PacketReturnType Packet; typedef typename Tensor::PacketReturnType Packet;
typedef typename unpacket_traits<Packet>::half HalfPacket; typedef typename unpacket_traits<Packet>::half HalfPacket;
template <int AlignmentType = Alignment> template <int AlignmentType>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const {
// whole method makes column major assumption // whole method makes column major assumption
@ -276,7 +276,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
return pload<Packet>(data); return pload<Packet>(data);
} }
template <int AlignmentType = Alignment> template <int AlignmentType>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
// whole method makes column major assumption // whole method makes column major assumption

View File

@ -233,7 +233,7 @@ struct traits<TensorConvolutionOp<Dimensions, InputXprType, KernelXprType> >
static const int Layout = traits<InputXprType>::Layout; static const int Layout = traits<InputXprType>::Layout;
enum { enum {
Flags = 0, Flags = 0
}; };
}; };

View File

@ -34,25 +34,25 @@ class TensorOpCost {
template <typename ArgType> template <typename ArgType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int MulCost() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int MulCost() {
return internal::functor_traits< return internal::functor_traits<
internal::scalar_product_op<ArgType, ArgType>>::Cost; internal::scalar_product_op<ArgType, ArgType> >::Cost;
} }
template <typename ArgType> template <typename ArgType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int AddCost() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int AddCost() {
return internal::functor_traits<internal::scalar_sum_op<ArgType>>::Cost; return internal::functor_traits<internal::scalar_sum_op<ArgType> >::Cost;
} }
template <typename ArgType> template <typename ArgType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int DivCost() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int DivCost() {
return internal::functor_traits< return internal::functor_traits<
internal::scalar_quotient_op<ArgType, ArgType>>::Cost; internal::scalar_quotient_op<ArgType, ArgType> >::Cost;
} }
template <typename ArgType> template <typename ArgType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int ModCost() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int ModCost() {
return internal::functor_traits<internal::scalar_mod_op<ArgType>>::Cost; return internal::functor_traits<internal::scalar_mod_op<ArgType> >::Cost;
} }
template <typename SrcType, typename TargetType> template <typename SrcType, typename TargetType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int CastCost() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int CastCost() {
return internal::functor_traits< return internal::functor_traits<
internal::scalar_cast_op<SrcType, TargetType>>::Cost; internal::scalar_cast_op<SrcType, TargetType> >::Cost;
} }
TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {} TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {}

View File

@ -275,7 +275,7 @@ struct DSizes : array<DenseIndex, NumDims> {
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const {
return internal::array_prod(*static_cast<const Base*>(this)); return (NumDims == 0) ? 1 : internal::array_prod(*static_cast<const Base*>(this));
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() {

View File

@ -34,7 +34,7 @@ struct traits<TensorEvalToOp<XprType> >
static const int Layout = XprTraits::Layout; static const int Layout = XprTraits::Layout;
enum { enum {
Flags = 0, Flags = 0
}; };
}; };

View File

@ -40,7 +40,7 @@ struct traits<TensorCwiseNullaryOp<NullaryOp, XprType> >
static const int Layout = XprTraits::Layout; static const int Layout = XprTraits::Layout;
enum { enum {
Flags = 0, Flags = 0
}; };
}; };
@ -163,7 +163,7 @@ struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
static const int Layout = XprTraits::Layout; static const int Layout = XprTraits::Layout;
enum { enum {
Flags = 0, Flags = 0
}; };
}; };

View File

@ -128,7 +128,6 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
return m_storage.data()[0]; return m_storage.data()[0];
} }
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES #ifdef EIGEN_HAS_VARIADIC_TEMPLATES
template<typename... IndexTypes> template<typename... IndexTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const
@ -137,8 +136,54 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}}); return this->operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
} }
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
{
if (Options&RowMajor) {
const Index index = i1 + i0 * m_storage.dimensions()[1];
return m_storage.data()[index];
} else {
const Index index = i0 + i1 * m_storage.dimensions()[0];
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
{
if (Options&RowMajor) {
const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
{
if (Options&RowMajor) {
const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
{
if (Options&RowMajor) {
const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
return m_storage.data()[index];
}
}
#endif #endif
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, NumIndices>& indices) const
{ {
@ -176,6 +221,51 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}}); return operator()(array<Index, NumIndices>{{firstIndex, otherIndices...}});
} }
#else
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
{
if (Options&RowMajor) {
const Index index = i1 + i0 * m_storage.dimensions()[1];
return m_storage.data()[index];
} else {
const Index index = i0 + i1 * m_storage.dimensions()[0];
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
{
if (Options&RowMajor) {
const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0);
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2);
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
{
if (Options&RowMajor) {
const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0));
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3));
return m_storage.data()[index];
}
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
{
if (Options&RowMajor) {
const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)));
return m_storage.data()[index];
} else {
const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4)));
return m_storage.data()[index];
}
}
#endif #endif
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC

View File

@ -34,7 +34,7 @@ struct traits<TensorForcedEvalOp<XprType> >
static const int Layout = XprTraits::Layout; static const int Layout = XprTraits::Layout;
enum { enum {
Flags = 0, Flags = 0
}; };
}; };

View File

@ -181,7 +181,7 @@ template<typename ArgType, typename Device>
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor, Layout = (static_cast<int>(TensorEvaluator<ArgType, Device>::Layout) == static_cast<int>(ColMajor)) ? RowMajor : ColMajor,
CoordAccess = false, // to be implemented CoordAccess = false // to be implemented
}; };
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)

View File

@ -40,7 +40,7 @@ class compute_tensor_flags
}; };
public: public:
enum { ret = packet_access_bit}; enum { ret = packet_access_bit };
}; };
@ -54,7 +54,7 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
enum { enum {
Options = Options_, Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit), Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0 : LvalueBit)
}; };
}; };
@ -69,7 +69,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor; static const int Layout = Options_ & RowMajor ? RowMajor : ColMajor;
enum { enum {
Options = Options_, Options = Options_,
Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit), Flags = compute_tensor_flags<Scalar_, Options_>::ret | (is_const<Scalar_>::value ? 0: LvalueBit)
}; };
}; };
@ -86,7 +86,7 @@ struct traits<TensorMap<PlainObjectType, Options_> >
static const int Layout = BaseTraits::Layout; static const int Layout = BaseTraits::Layout;
enum { enum {
Options = Options_, Options = Options_,
Flags = BaseTraits::Flags, Flags = BaseTraits::Flags
}; };
}; };
@ -102,7 +102,7 @@ struct traits<TensorRef<PlainObjectType> >
static const int Layout = BaseTraits::Layout; static const int Layout = BaseTraits::Layout;
enum { enum {
Options = BaseTraits::Options, Options = BaseTraits::Options,
Flags = BaseTraits::Flags, Flags = BaseTraits::Flags
}; };
}; };
@ -253,7 +253,7 @@ struct nested<const TensorRef<PlainObjectType> >
// Pc=0. // Pc=0.
typedef enum { typedef enum {
PADDING_VALID = 1, PADDING_VALID = 1,
PADDING_SAME = 2, PADDING_SAME = 2
} PaddingType; } PaddingType;
} // end namespace Eigen } // end namespace Eigen

View File

@ -535,7 +535,7 @@ InstType instantiate_by_c_array(ArrType* arr)
#else // Non C++11, fallback to emulation mode #else // Non C++11, fallback to emulation mode
#include "src/Core/util/EmulateCXX11Meta.h" #include "EmulateCXX11Meta.h"
#endif #endif

View File

@ -41,7 +41,7 @@ class MaxSizeVector {
// Construct a new MaxSizeVector, reserve and resize to n. // Construct a new MaxSizeVector, reserve and resize to n.
// Copy the init value to all elements. // Copy the init value to all elements.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
explicit MaxSizeVector(size_t n, const T& init) MaxSizeVector(size_t n, const T& init)
: reserve_(n), size_(n), : reserve_(n), size_(n),
data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); } for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); }

View File

@ -110,35 +110,48 @@ ei_add_test(minres)
ei_add_test(levenberg_marquardt) ei_add_test(levenberg_marquardt)
ei_add_test(kronecker_product) ei_add_test(kronecker_product)
# TODO: The following test names are prefixed with the cxx11 string, since historically
# the tests depended on c++11. This isn't the case anymore so we ought to rename them.
ei_add_test(cxx11_float16)
ei_add_test(cxx11_tensor_dimension)
ei_add_test(cxx11_tensor_map)
ei_add_test(cxx11_tensor_assign)
ei_add_test(cxx11_tensor_comparisons)
ei_add_test(cxx11_tensor_forced_eval)
ei_add_test(cxx11_tensor_math)
ei_add_test(cxx11_tensor_const)
ei_add_test(cxx11_tensor_intdiv)
ei_add_test(cxx11_tensor_casts)
ei_add_test(cxx11_tensor_empty)
ei_add_test(cxx11_tensor_sugar)
ei_add_test(cxx11_tensor_roundings)
ei_add_test(cxx11_tensor_layout_swap)
ei_add_test(cxx11_tensor_io)
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
# This test requires __uint128_t which is only available on 64bit systems
ei_add_test(cxx11_tensor_uint128)
endif()
if(EIGEN_TEST_CXX11) if(EIGEN_TEST_CXX11)
# It should be safe to always run these tests as there is some fallback code for # It should be safe to always run these tests as there is some fallback code for
# older compiler that don't support cxx11. # older compiler that don't support cxx11.
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
ei_add_test(cxx11_float16)
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_meta) ei_add_test(cxx11_meta)
ei_add_test(cxx11_tensor_simple) ei_add_test(cxx11_tensor_simple)
# ei_add_test(cxx11_tensor_symmetry) # ei_add_test(cxx11_tensor_symmetry)
ei_add_test(cxx11_tensor_assign)
ei_add_test(cxx11_tensor_dimension)
ei_add_test(cxx11_tensor_index_list) ei_add_test(cxx11_tensor_index_list)
ei_add_test(cxx11_tensor_mixed_indices) ei_add_test(cxx11_tensor_mixed_indices)
ei_add_test(cxx11_tensor_comparisons)
ei_add_test(cxx11_tensor_contraction) ei_add_test(cxx11_tensor_contraction)
ei_add_test(cxx11_tensor_convolution) ei_add_test(cxx11_tensor_convolution)
ei_add_test(cxx11_tensor_expr) ei_add_test(cxx11_tensor_expr)
ei_add_test(cxx11_tensor_math)
ei_add_test(cxx11_tensor_forced_eval)
ei_add_test(cxx11_tensor_fixed_size) ei_add_test(cxx11_tensor_fixed_size)
ei_add_test(cxx11_tensor_const)
ei_add_test(cxx11_tensor_of_const_values) ei_add_test(cxx11_tensor_of_const_values)
ei_add_test(cxx11_tensor_of_complex) ei_add_test(cxx11_tensor_of_complex)
ei_add_test(cxx11_tensor_of_strings) ei_add_test(cxx11_tensor_of_strings)
ei_add_test(cxx11_tensor_intdiv)
ei_add_test(cxx11_tensor_lvalue) ei_add_test(cxx11_tensor_lvalue)
ei_add_test(cxx11_tensor_map)
ei_add_test(cxx11_tensor_broadcasting) ei_add_test(cxx11_tensor_broadcasting)
ei_add_test(cxx11_tensor_chipping) ei_add_test(cxx11_tensor_chipping)
ei_add_test(cxx11_tensor_concatenation) ei_add_test(cxx11_tensor_concatenation)
@ -156,23 +169,11 @@ if(EIGEN_TEST_CXX11)
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_ref) ei_add_test(cxx11_tensor_ref)
ei_add_test(cxx11_tensor_random) ei_add_test(cxx11_tensor_random)
ei_add_test(cxx11_tensor_casts)
ei_add_test(cxx11_tensor_roundings)
ei_add_test(cxx11_tensor_reverse)
ei_add_test(cxx11_tensor_layout_swap)
ei_add_test(cxx11_tensor_io)
ei_add_test(cxx11_tensor_generator) ei_add_test(cxx11_tensor_generator)
ei_add_test(cxx11_tensor_custom_op) ei_add_test(cxx11_tensor_custom_op)
ei_add_test(cxx11_tensor_custom_index) ei_add_test(cxx11_tensor_custom_index)
ei_add_test(cxx11_tensor_sugar)
ei_add_test(cxx11_tensor_fft) ei_add_test(cxx11_tensor_fft)
ei_add_test(cxx11_tensor_ifft) ei_add_test(cxx11_tensor_ifft)
ei_add_test(cxx11_tensor_empty)
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
# This test requires __uint128_t which is only available on 64bit systems
ei_add_test(cxx11_tensor_uint128)
endif()
endif() endif()

View File

@ -31,9 +31,9 @@ void test_conversion()
VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002); VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002);
// Verify round-to-nearest-even behavior. // Verify round-to-nearest-even behavior.
float val1 = float(half(__half{0x3c00})); float val1 = float(half(__half(0x3c00)));
float val2 = float(half(__half{0x3c01})); float val2 = float(half(__half(0x3c01)));
float val3 = float(half(__half{0x3c02})); float val3 = float(half(__half(0x3c02)));
VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00); VERIFY_IS_EQUAL(half(0.5 * (val1 + val2)).x, 0x3c00);
VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02); VERIFY_IS_EQUAL(half(0.5 * (val2 + val3)).x, 0x3c02);
@ -49,21 +49,21 @@ void test_conversion()
VERIFY_IS_EQUAL(half(true).x, 0x3c00); VERIFY_IS_EQUAL(half(true).x, 0x3c00);
// Conversion to float. // Conversion to float.
VERIFY_IS_EQUAL(float(half(__half{0x0000})), 0.0f); VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f);
VERIFY_IS_EQUAL(float(half(__half{0x3c00})), 1.0f); VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f);
// Denormals. // Denormals.
VERIFY_IS_APPROX(float(half(__half{0x8001})), -5.96046e-08f); VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f);
VERIFY_IS_APPROX(float(half(__half{0x0001})), 5.96046e-08f); VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f);
VERIFY_IS_APPROX(float(half(__half{0x0002})), 1.19209e-07f); VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f);
// NaNs and infinities. // NaNs and infinities.
VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number. VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number.
VERIFY(!(numext::isnan)(float(half(0.0f)))); VERIFY(!(numext::isnan)(float(half(0.0f))));
VERIFY((numext::isinf)(float(half(__half{0xfc00})))); VERIFY((numext::isinf)(float(half(__half(0xfc00)))));
VERIFY((numext::isnan)(float(half(__half{0xfc01})))); VERIFY((numext::isnan)(float(half(__half(0xfc01)))));
VERIFY((numext::isinf)(float(half(__half{0x7c00})))); VERIFY((numext::isinf)(float(half(__half(0x7c00)))));
VERIFY((numext::isnan)(float(half(__half{0x7c01})))); VERIFY((numext::isnan)(float(half(__half(0x7c01)))));
#if !EIGEN_COMP_MSVC #if !EIGEN_COMP_MSVC
// Visual Studio errors out on divisions by 0 // Visual Studio errors out on divisions by 0
@ -73,12 +73,12 @@ void test_conversion()
#endif #endif
// Exactly same checks as above, just directly on the half representation. // Exactly same checks as above, just directly on the half representation.
VERIFY(!(numext::isinf)(half(__half{0x7bff}))); VERIFY(!(numext::isinf)(half(__half(0x7bff))));
VERIFY(!(numext::isnan)(half(__half{0x0000}))); VERIFY(!(numext::isnan)(half(__half(0x0000))));
VERIFY((numext::isinf)(half(__half{0xfc00}))); VERIFY((numext::isinf)(half(__half(0xfc00))));
VERIFY((numext::isnan)(half(__half{0xfc01}))); VERIFY((numext::isnan)(half(__half(0xfc01))));
VERIFY((numext::isinf)(half(__half{0x7c00}))); VERIFY((numext::isinf)(half(__half(0x7c00))));
VERIFY((numext::isnan)(half(__half{0x7c01}))); VERIFY((numext::isnan)(half(__half(0x7c01))));
#if !EIGEN_COMP_MSVC #if !EIGEN_COMP_MSVC
// Visual Studio errors out on divisions by 0 // Visual Studio errors out on divisions by 0

View File

@ -64,7 +64,7 @@ static void test_argmax_tuple_reducer()
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced; Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
DimensionList<DenseIndex, 4> dims; DimensionList<DenseIndex, 4> dims;
reduced = index_tuples.reduce( reduced = index_tuples.reduce(
dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>()); dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 0, DataLayout> maxi = tensor.maximum(); Tensor<float, 0, DataLayout> maxi = tensor.maximum();
@ -74,7 +74,7 @@ static void test_argmax_tuple_reducer()
for (int d = 0; d < 3; ++d) reduce_dims[d] = d; for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7); Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_tuples.reduce( reduced_by_dims = index_tuples.reduce(
reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float>>()); reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims); Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims);
@ -96,7 +96,7 @@ static void test_argmin_tuple_reducer()
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced; Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
DimensionList<DenseIndex, 4> dims; DimensionList<DenseIndex, 4> dims;
reduced = index_tuples.reduce( reduced = index_tuples.reduce(
dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>()); dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 0, DataLayout> mini = tensor.minimum(); Tensor<float, 0, DataLayout> mini = tensor.minimum();
@ -106,7 +106,7 @@ static void test_argmin_tuple_reducer()
for (int d = 0; d < 3; ++d) reduce_dims[d] = d; for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7); Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_tuples.reduce( reduced_by_dims = index_tuples.reduce(
reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float>>()); reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims); Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims);

View File

@ -37,7 +37,6 @@ static void test_fixed_size()
VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7); VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7);
} }
static void test_match() static void test_match()
{ {
Eigen::DSizes<int, 3> dyn(2,3,7); Eigen::DSizes<int, 3> dyn(2,3,7);
@ -49,10 +48,22 @@ static void test_match()
VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn1, dyn2), false); VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn1, dyn2), false);
} }
static void test_rank_zero()
{
Eigen::Sizes<> scalar;
VERIFY_IS_EQUAL(scalar.TotalSize(), 1);
VERIFY_IS_EQUAL(scalar.rank(), 0);
VERIFY_IS_EQUAL(internal::array_prod(scalar), 1);
Eigen::DSizes<ptrdiff_t, 0> dscalar;
VERIFY_IS_EQUAL(dscalar.TotalSize(), 1);
VERIFY_IS_EQUAL(dscalar.rank(), 0);
}
void test_cxx11_tensor_dimension() void test_cxx11_tensor_dimension()
{ {
CALL_SUBTEST(test_dynamic_size()); CALL_SUBTEST(test_dynamic_size());
CALL_SUBTEST(test_fixed_size()); CALL_SUBTEST(test_fixed_size());
CALL_SUBTEST(test_match()); CALL_SUBTEST(test_match());
CALL_SUBTEST(test_rank_zero());
} }

View File

@ -24,10 +24,10 @@ static void test_empty_tensor()
static void test_empty_fixed_size_tensor() static void test_empty_fixed_size_tensor()
{ {
TensorFixedSize<float, Sizes<0>> source; TensorFixedSize<float, Sizes<0> > source;
TensorFixedSize<float, Sizes<0>> tgt1 = source; TensorFixedSize<float, Sizes<0> > tgt1 = source;
TensorFixedSize<float, Sizes<0>> tgt2(source); TensorFixedSize<float, Sizes<0> > tgt2(source);
TensorFixedSize<float, Sizes<0>> tgt3; TensorFixedSize<float, Sizes<0> > tgt3;
tgt3 = tgt1; tgt3 = tgt1;
tgt3 = tgt2; tgt3 = tgt2;
} }

View File

@ -130,9 +130,9 @@ static void test_tensor_map()
static void test_2d() static void test_2d()
{ {
float data1[6]; float data1[6];
TensorMap<TensorFixedSize<float, Sizes<2, 3> >> mat1(data1,2,3); TensorMap<TensorFixedSize<float, Sizes<2, 3> > > mat1(data1,2,3);
float data2[6]; float data2[6];
TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor>> mat2(data2,2,3); TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor> > mat2(data2,2,3);
VERIFY_IS_EQUAL((mat1.size()), 2*3); VERIFY_IS_EQUAL((mat1.size()), 2*3);
VERIFY_IS_EQUAL(mat1.rank(), 2); VERIFY_IS_EQUAL(mat1.rank(), 2);
@ -153,7 +153,7 @@ static void test_2d()
mat2(1,1) = -4.0; mat2(1,1) = -4.0;
mat2(1,2) = -5.0; mat2(1,2) = -5.0;
TensorFixedSize<float, Sizes<2, 3>> mat3; TensorFixedSize<float, Sizes<2, 3> > mat3;
TensorFixedSize<float, Sizes<2, 3>, RowMajor> mat4; TensorFixedSize<float, Sizes<2, 3>, RowMajor> mat4;
mat3 = mat1.abs(); mat3 = mat1.abs();
mat4 = mat2.abs(); mat4 = mat2.abs();

View File

@ -22,14 +22,15 @@ static void test_simple()
m1.setRandom(); m1.setRandom();
m2.setRandom(); m2.setRandom();
TensorMap<Tensor<float, 2>> mat1(m1.data(), 3,3); TensorMap<Tensor<float, 2> > mat1(m1.data(), 3,3);
TensorMap<Tensor<float, 2>> mat2(m2.data(), 3,3); TensorMap<Tensor<float, 2> > mat2(m2.data(), 3,3);
Tensor<float, 2> mat3(3,3); Tensor<float, 2> mat3(3,3);
mat3 = mat1; mat3 = mat1;
typedef Tensor<float, 1>::DimensionPair DimPair; typedef Tensor<float, 1>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims({{DimPair(1, 0)}}); Eigen::array<DimPair, 1> dims;
dims[0] = DimPair(1, 0);
mat3 = mat3.contract(mat2, dims).eval(); mat3 = mat3.contract(mat2, dims).eval();
@ -60,7 +61,7 @@ static void test_const()
Eigen::array<int, 2> bcast; Eigen::array<int, 2> bcast;
bcast[0] = 3; bcast[0] = 3;
bcast[1] = 1; bcast[1] = 1;
const TensorMap<Tensor<const float, 2>> input_tensor(input.data(), 3, 3); const TensorMap<Tensor<const float, 2> > input_tensor(input.data(), 3, 3);
Tensor<float, 2> output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); Tensor<float, 2> output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast));
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {

View File

@ -19,8 +19,8 @@ static void test_0d()
Tensor<int, 0> scalar1; Tensor<int, 0> scalar1;
Tensor<int, 0, RowMajor> scalar2; Tensor<int, 0, RowMajor> scalar2;
TensorMap<Tensor<const int, 0>> scalar3(scalar1.data()); TensorMap<Tensor<const int, 0> > scalar3(scalar1.data());
TensorMap<Tensor<const int, 0, RowMajor>> scalar4(scalar2.data()); TensorMap<Tensor<const int, 0, RowMajor> > scalar4(scalar2.data());
scalar1() = 7; scalar1() = 7;
scalar2() = 13; scalar2() = 13;
@ -37,8 +37,8 @@ static void test_1d()
Tensor<int, 1> vec1(6); Tensor<int, 1> vec1(6);
Tensor<int, 1, RowMajor> vec2(6); Tensor<int, 1, RowMajor> vec2(6);
TensorMap<Tensor<const int, 1>> vec3(vec1.data(), 6); TensorMap<Tensor<const int, 1> > vec3(vec1.data(), 6);
TensorMap<Tensor<const int, 1, RowMajor>> vec4(vec2.data(), 6); TensorMap<Tensor<const int, 1, RowMajor> > vec4(vec2.data(), 6);
vec1(0) = 4; vec2(0) = 0; vec1(0) = 4; vec2(0) = 0;
vec1(1) = 8; vec2(1) = 1; vec1(1) = 8; vec2(1) = 1;
@ -85,8 +85,8 @@ static void test_2d()
mat2(1,1) = 4; mat2(1,1) = 4;
mat2(1,2) = 5; mat2(1,2) = 5;
TensorMap<Tensor<const int, 2>> mat3(mat1.data(), 2, 3); TensorMap<Tensor<const int, 2> > mat3(mat1.data(), 2, 3);
TensorMap<Tensor<const int, 2, RowMajor>> mat4(mat2.data(), 2, 3); TensorMap<Tensor<const int, 2, RowMajor> > mat4(mat2.data(), 2, 3);
VERIFY_IS_EQUAL(mat3.rank(), 2); VERIFY_IS_EQUAL(mat3.rank(), 2);
VERIFY_IS_EQUAL(mat3.size(), 6); VERIFY_IS_EQUAL(mat3.size(), 6);
@ -129,8 +129,8 @@ static void test_3d()
} }
} }
TensorMap<Tensor<const int, 3>> mat3(mat1.data(), 2, 3, 7); TensorMap<Tensor<const int, 3> > mat3(mat1.data(), 2, 3, 7);
TensorMap<Tensor<const int, 3, RowMajor>> mat4(mat2.data(), array<DenseIndex, 3>{{2, 3, 7}}); TensorMap<Tensor<const int, 3, RowMajor> > mat4(mat2.data(), 2, 3, 7);
VERIFY_IS_EQUAL(mat3.rank(), 3); VERIFY_IS_EQUAL(mat3.rank(), 3);
VERIFY_IS_EQUAL(mat3.size(), 2*3*7); VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
@ -173,8 +173,8 @@ static void test_from_tensor()
} }
} }
TensorMap<Tensor<int, 3>> mat3(mat1); TensorMap<Tensor<int, 3> > mat3(mat1);
TensorMap<Tensor<int, 3, RowMajor>> mat4(mat2); TensorMap<Tensor<int, 3, RowMajor> > mat4(mat2);
VERIFY_IS_EQUAL(mat3.rank(), 3); VERIFY_IS_EQUAL(mat3.rank(), 3);
VERIFY_IS_EQUAL(mat3.size(), 2*3*7); VERIFY_IS_EQUAL(mat3.size(), 2*3*7);
@ -199,19 +199,23 @@ static void test_from_tensor()
} }
} }
TensorFixedSize<int, Sizes<2,3,7>> mat5; TensorFixedSize<int, Sizes<2,3,7> > mat5;
val = 0; val = 0;
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) { for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) { for (int k = 0; k < 7; ++k) {
mat5(i,j,k) = val; array<ptrdiff_t, 3> coords;
coords[0] = i;
coords[1] = j;
coords[2] = k;
mat5(coords) = val;
val++; val++;
} }
} }
} }
TensorMap<TensorFixedSize<int, Sizes<2,3,7>>> mat6(mat5); TensorMap<TensorFixedSize<int, Sizes<2,3,7> > > mat6(mat5);
VERIFY_IS_EQUAL(mat6.rank(), 3); VERIFY_IS_EQUAL(mat6.rank(), 3);
VERIFY_IS_EQUAL(mat6.size(), 2*3*7); VERIFY_IS_EQUAL(mat6.size(), 2*3*7);
@ -233,8 +237,8 @@ static void test_from_tensor()
static int f(const TensorMap<Tensor<int, 3> >& tensor) { static int f(const TensorMap<Tensor<int, 3> >& tensor) {
// Size<0> empty; // Size<0> empty;
EIGEN_STATIC_ASSERT((internal::array_size<Sizes<>>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((internal::array_size<Sizes<> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0>>::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
Tensor<int, 0> result = tensor.sum(); Tensor<int, 0> result = tensor.sum();
return result(); return result();
} }
@ -253,7 +257,7 @@ static void test_casting()
} }
} }
TensorMap<Tensor<int, 3>> map(tensor); TensorMap<Tensor<int, 3> > map(tensor);
int sum1 = f(map); int sum1 = f(map);
int sum2 = f(tensor); int sum2 = f(tensor);

View File

@ -134,6 +134,68 @@ void test_cuda_elementwise() {
gpu_device.deallocate(d_res_float); gpu_device.deallocate(d_res_float);
} }
void test_cuda_trancendental() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res1_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res1_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res2_half = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res2_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
d_float1, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
d_float2, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res1_half(
d_res1_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res1_float(
d_res1_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res2_half(
d_res2_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res2_float(
d_res2_float, num_elem);
gpu_float1.device(gpu_device) = gpu_float1.random();
gpu_float2.device(gpu_device) = gpu_float2.random();
gpu_res1_float.device(gpu_device) = gpu_float1.exp();
gpu_res2_float.device(gpu_device) = gpu_float2.log();
gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().exp().cast<float>();
gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>().log().cast<float>();
Tensor<float, 1> input1(num_elem);
Tensor<float, 1> half_prec1(num_elem);
Tensor<float, 1> full_prec1(num_elem);
Tensor<float, 1> input2(num_elem);
Tensor<float, 1> half_prec2(num_elem);
Tensor<float, 1> full_prec2(num_elem);
gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking elemwise exp " << i << " input = " << input1(i) << " full = " << full_prec1(i) << " half = " << half_prec1(i) << std::endl;
VERIFY_IS_APPROX(full_prec1(i), half_prec1(i));
}
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl;
VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
}
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
gpu_device.deallocate(d_res1_half);
gpu_device.deallocate(d_res1_float);
gpu_device.deallocate(d_res2_half);
gpu_device.deallocate(d_res2_float);
}
void test_cuda_contractions() { void test_cuda_contractions() {
Eigen::CudaStreamDevice stream; Eigen::CudaStreamDevice stream;
@ -280,6 +342,7 @@ void test_cxx11_tensor_of_float16_cuda()
CALL_SUBTEST_1(test_cuda_conversion()); CALL_SUBTEST_1(test_cuda_conversion());
CALL_SUBTEST_1(test_cuda_unary()); CALL_SUBTEST_1(test_cuda_unary());
CALL_SUBTEST_1(test_cuda_elementwise()); CALL_SUBTEST_1(test_cuda_elementwise());
CALL_SUBTEST_1(test_cuda_trancendental());
CALL_SUBTEST_2(test_cuda_contractions()); CALL_SUBTEST_2(test_cuda_contractions());
CALL_SUBTEST_3(test_cuda_reductions()); CALL_SUBTEST_3(test_cuda_reductions());
CALL_SUBTEST_4(test_cuda_forced_evals()); CALL_SUBTEST_4(test_cuda_forced_evals());

View File

@ -195,7 +195,10 @@ static void test_3d()
VERIFY_IS_EQUAL((epsilon(0,2,1)), -1); VERIFY_IS_EQUAL((epsilon(0,2,1)), -1);
VERIFY_IS_EQUAL((epsilon(1,0,2)), -1); VERIFY_IS_EQUAL((epsilon(1,0,2)), -1);
array<Eigen::DenseIndex, 3> dims{{2,3,4}}; array<Eigen::DenseIndex, 3> dims;
dims[0] = 2;
dims[1] = 3;
dims[2] = 4;
Tensor<int, 3> t1(dims); Tensor<int, 3> t1(dims);
Tensor<int, 3, RowMajor> t2(dims); Tensor<int, 3, RowMajor> t2(dims);