Extend support for Packet16b:

* Add ptranspose<*,4> to support matmul and add unit test for Matrix<bool> * Matrix<bool>
* work around a bug in slicing of Tensor<bool>.
* Add tensor tests

This speeds up matmul for boolean matrices by about 10x

name                            old time/op             new time/op             delta
BM_MatMul<bool>/8                267ns ± 0%              479ns ± 0%  +79.25%          (p=0.008 n=5+5)
BM_MatMul<bool>/32              6.42µs ± 0%             0.87µs ± 0%  -86.50%          (p=0.008 n=5+5)
BM_MatMul<bool>/64              43.3µs ± 0%              5.9µs ± 0%  -86.42%          (p=0.008 n=5+5)
BM_MatMul<bool>/128              315µs ± 0%               44µs ± 0%  -85.98%          (p=0.008 n=5+5)
BM_MatMul<bool>/256             2.41ms ± 0%             0.34ms ± 0%  -85.68%          (p=0.008 n=5+5)
BM_MatMul<bool>/512             18.8ms ± 0%              2.7ms ± 0%  -85.53%          (p=0.008 n=5+5)
BM_MatMul<bool>/1k               149ms ± 0%               22ms ± 0%  -85.40%          (p=0.008 n=5+5)
This commit is contained in:
Rasmus Munk Larsen 2020-04-24 17:29:25 -07:00
parent b47c777993
commit ab773c7e91
10 changed files with 267 additions and 162 deletions

View File

@ -179,6 +179,9 @@ preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); }
/** \internal \returns a + b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
padd(const Packet& a, const Packet& b) { return a+b; }
// Avoid compiler warning for boolean algebra.
template<> EIGEN_DEVICE_FUNC inline bool
padd(const bool& a, const bool& b) { return a || b; }
/** \internal \returns a - b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
@ -196,6 +199,9 @@ pconj(const Packet& a) { return numext::conj(a); }
/** \internal \returns a * b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pmul(const Packet& a, const Packet& b) { return a*b; }
// Avoid compiler warning for boolean algebra.
template<> EIGEN_DEVICE_FUNC inline bool
pmul(const bool& a, const bool& b) { return a && b; }
/** \internal \returns a / b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet

View File

@ -170,10 +170,10 @@ template<> struct packet_traits<bool> : default_packet_traits
HasHalfPacket = 0,
size=16,
HasAdd = 0,
HasAdd = 1,
HasSub = 0,
HasShift = 0,
HasMul = 0,
HasMul = 1,
HasNegate = 0,
HasAbs = 0,
HasAbs2 = 0,
@ -249,6 +249,8 @@ template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const
template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
template<> EIGEN_STRONG_INLINE Packet16b padd<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_or_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_sub_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_sub_epi32(a,b); }
@ -290,6 +292,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const
#endif
}
template<> EIGEN_STRONG_INLINE Packet16b pmul<Packet16b>(const Packet16b& a, const Packet16b& b) { return _mm_and_si128(a,b); }
template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_div_pd(a,b); }
@ -646,6 +650,7 @@ template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { return _mm_cvtss_f32(a); }
template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { return _mm_cvtsd_f64(a); }
template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { return _mm_cvtsi128_si32(a); }
template<> EIGEN_STRONG_INLINE bool pfirst<Packet16b>(const Packet16b& a) { int x = _mm_cvtsi128_si32(a); return static_cast<bool>(x & 1); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
@ -762,6 +767,7 @@ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
Packet4i tmp0 = _mm_hadd_epi32(a,a);
return pfirst<Packet4i>(_mm_hadd_epi32(tmp0,tmp0));
}
#else
template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
{
@ -769,8 +775,22 @@ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
}
#endif
#ifdef EIGEN_VECTORIZE_SSE4_1
template<> EIGEN_STRONG_INLINE bool predux<Packet16b>(const Packet16b& a) {
Packet16b tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a,a));
return _mm_extract_epi64(tmp, 0) != 0;
}
#else
template<> EIGEN_STRONG_INLINE bool predux<Packet16b>(const Packet16b& a) {
Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a,a));
return (pfirst(tmp) != 0) || (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) != 0);
}
#endif
// Other reduction functions:
// mul
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
{
@ -987,6 +1007,19 @@ ptranspose(PacketBlock<Packet4i,4>& kernel) {
kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet16b,4>& kernel) {
__m128i T0 = _mm_unpacklo_epi8(kernel.packet[0], kernel.packet[1]);
__m128i T1 = _mm_unpackhi_epi8(kernel.packet[0], kernel.packet[1]);
__m128i T2 = _mm_unpacklo_epi8(kernel.packet[2], kernel.packet[3]);
__m128i T3 = _mm_unpackhi_epi8(kernel.packet[2], kernel.packet[3]);
kernel.packet[0] = _mm_unpacklo_epi16(T0, T2);
kernel.packet[1] = _mm_unpackhi_epi16(T0, T2);
kernel.packet[2] = _mm_unpacklo_epi16(T1, T3);
kernel.packet[3] = _mm_unpackhi_epi16(T1, T3);
}
template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
const __m128i zero = _mm_setzero_si128();
const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);

View File

@ -39,12 +39,12 @@ struct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>
EIGEN_SCALAR_BINARY_OP_PLUGIN
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::padd(a,b); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const
{ return internal::predux(a); }
};
template<typename LhsScalar,typename RhsScalar>
@ -56,15 +56,9 @@ struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
};
};
/** \internal
* \brief Template specialization to deprecate the summation of boolean expressions.
* This is required to solve Bug 426.
* \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
*/
template<> struct scalar_sum_op<bool,bool> : scalar_sum_op<int,int> {
EIGEN_DEPRECATED
scalar_sum_op() {}
};
template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool scalar_sum_op<bool,bool>::operator() (const bool& a, const bool& b) const { return a || b; }
/** \internal
@ -83,12 +77,12 @@ struct scalar_product_op : binary_op_base<LhsScalar,RhsScalar>
EIGEN_SCALAR_BINARY_OP_PLUGIN
}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmul(a,b); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const
{ return internal::predux_mul(a); }
};
template<typename LhsScalar,typename RhsScalar>
@ -100,6 +94,10 @@ struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
};
};
template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool scalar_product_op<bool,bool>::operator() (const bool& a, const bool& b) const { return a && b; }
/** \internal
* \brief Template functor to compute the conjugate product of two scalars
*
@ -116,11 +114,11 @@ struct scalar_conj_product_op : binary_op_base<LhsScalar,RhsScalar>
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const
{ return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
{ return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
};
template<typename LhsScalar,typename RhsScalar>
@ -141,12 +139,12 @@ struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
{
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmin(a,b); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const
{ return internal::predux_min(a); }
};
template<typename LhsScalar,typename RhsScalar>
@ -167,12 +165,12 @@ struct scalar_max_op : binary_op_base<LhsScalar,RhsScalar>
{
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const
{ return internal::pmax(a,b); }
template<typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const
{ return internal::predux_max(a); }
};
template<typename LhsScalar,typename RhsScalar>

View File

@ -56,6 +56,31 @@ test_lazy_single(int rows, int cols, int depth)
VERIFY_IS_APPROX(C+=A.lazyProduct(B), ref_prod(D,A,B));
}
template<typename T>
void test_dynamic_exact()
{
int rows = internal::random<int>(1,64);
int cols = internal::random<int>(1,64);
int depth = internal::random<int>(1,65);
typedef Matrix<T,Dynamic,Dynamic> MatrixX;
MatrixX A(rows,depth); A.setRandom();
MatrixX B(depth,cols); B.setRandom();
MatrixX C(rows,cols); C.setRandom();
MatrixX D(C);
for(Index i=0;i<C.rows();++i)
for(Index j=0;j<C.cols();++j)
for(Index k=0;k<A.cols();++k)
D.coeffRef(i,j) |= A.coeff(i,k) & B.coeff(k,j);
C += A * B;
VERIFY_IS_EQUAL(C, D);
MatrixX E = B.transpose();
for(Index i=0;i<B.rows();++i)
for(Index j=0;j<B.cols();++j)
VERIFY_IS_EQUAL(B(i,j), E(j,i));
}
template<typename T, int Rows, int Cols, int Depth, int OC, int OA, int OB>
typename internal::enable_if< ( (Rows ==1&&Depth!=1&&OA==ColMajor)
|| (Depth==1&&Rows !=1&&OA==RowMajor)
@ -291,6 +316,8 @@ EIGEN_DECLARE_TEST(product_small)
CALL_SUBTEST_6( bug_1311<3>() );
CALL_SUBTEST_6( bug_1311<5>() );
CALL_SUBTEST_9( test_dynamic_exact<bool>() );
}
CALL_SUBTEST_6( product_small_regressions<0>() );

View File

@ -456,7 +456,9 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
// slice offsets and sizes.
IsAligned = false,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
// FIXME: Temporary workaround for bug in slicing of bool tensors.
!internal::is_same<typename internal::remove_const<Scalar>::type, bool>::value,
PreferBlockAccess = true,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
@ -525,7 +527,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL);
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization

View File

@ -233,7 +233,7 @@ static void test_eval_tensor_binary_expr_block() {
rhs.setRandom();
VerifyBlockEvaluator<T, NumDims, Layout>(
lhs + rhs, [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
lhs * rhs, [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
}
template <typename T, int NumDims, int Layout>
@ -274,7 +274,7 @@ static void test_eval_tensor_broadcast() {
// Check that desc.destination() memory is not shared between two broadcast
// materializations.
VerifyBlockEvaluator<T, NumDims, Layout>(
input.broadcast(bcast) + input.square().broadcast(bcast),
input.broadcast(bcast) * input.square().broadcast(bcast),
[&bcasted_dims]() { return SkewedInnerBlock<Layout>(bcasted_dims); });
}
@ -509,7 +509,7 @@ static void test_eval_tensor_reshape_with_bcast() {
DSizes<Index, 2> dims(dim, dim);
VerifyBlockEvaluator<T, 2, Layout>(
lhs.reshape(reshapeLhs).broadcast(bcastLhs) +
lhs.reshape(reshapeLhs).broadcast(bcastLhs) *
rhs.reshape(reshapeRhs).broadcast(bcastRhs),
[dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
}
@ -529,11 +529,11 @@ static void test_eval_tensor_forced_eval() {
DSizes<Index, 2> dims(dim, dim);
VerifyBlockEvaluator<T, 2, Layout>(
(lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims),
(lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims),
[dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
VerifyBlockEvaluator<T, 2, Layout>(
(lhs.broadcast(bcastLhs) + rhs.broadcast(bcastRhs)).eval().reshape(dims),
(lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims),
[dims]() { return RandomBlock<Layout, 2>(dims, 1, 50); });
}
@ -755,6 +755,38 @@ static void test_assign_to_tensor_shuffle() {
#define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART
#define CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(PART, NAME) \
CALL_SUBTEST_PART(PART)((NAME<float, 1, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 2, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 3, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 4, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 5, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 1, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 2, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 5, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 1, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 2, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 3, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 4, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 5, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 1, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 2, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<int, 5, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 1, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 2, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 3, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 4, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 5, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 1, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 2, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, 5, ColMajor>()))
#define CALL_SUBTESTS_DIMS_LAYOUTS(PART, NAME) \
CALL_SUBTEST_PART(PART)((NAME<float, 1, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 2, RowMajor>())); \
@ -767,36 +799,38 @@ static void test_assign_to_tensor_shuffle() {
CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, 5, ColMajor>()))
#define CALL_SUBTESTS_LAYOUTS(PART, NAME) \
#define CALL_SUBTESTS_LAYOUTS_TYPES(PART, NAME) \
CALL_SUBTEST_PART(PART)((NAME<float, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, ColMajor>()))
CALL_SUBTEST_PART(PART)((NAME<float, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, ColMajor>()))
EIGEN_DECLARE_TEST(cxx11_tensor_block_eval) {
// clang-format off
CALL_SUBTESTS_DIMS_LAYOUTS(1, test_eval_tensor_block);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_block);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_binary_expr_block);
CALL_SUBTESTS_DIMS_LAYOUTS(1, test_eval_tensor_unary_expr_block);
CALL_SUBTESTS_DIMS_LAYOUTS(1, test_eval_tensor_binary_expr_block);
CALL_SUBTESTS_DIMS_LAYOUTS(2, test_eval_tensor_binary_with_unary_expr_block);
CALL_SUBTESTS_DIMS_LAYOUTS(2, test_eval_tensor_broadcast);
CALL_SUBTESTS_DIMS_LAYOUTS(2, test_eval_tensor_reshape);
CALL_SUBTESTS_DIMS_LAYOUTS(3, test_eval_tensor_cast);
CALL_SUBTESTS_DIMS_LAYOUTS(3, test_eval_tensor_select);
CALL_SUBTESTS_DIMS_LAYOUTS(3, test_eval_tensor_padding);
CALL_SUBTESTS_DIMS_LAYOUTS(4, test_eval_tensor_chipping);
CALL_SUBTESTS_DIMS_LAYOUTS(4, test_eval_tensor_generator);
CALL_SUBTESTS_DIMS_LAYOUTS(4, test_eval_tensor_reverse);
CALL_SUBTESTS_DIMS_LAYOUTS(5, test_eval_tensor_slice);
CALL_SUBTESTS_DIMS_LAYOUTS(5, test_eval_tensor_shuffle);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_broadcast);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_reshape);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_cast);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_select);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_padding);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_chipping);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_generator);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_reverse);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_slice);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_shuffle);
CALL_SUBTESTS_LAYOUTS(6, test_eval_tensor_reshape_with_bcast);
CALL_SUBTESTS_LAYOUTS(6, test_eval_tensor_forced_eval);
CALL_SUBTESTS_LAYOUTS(6, test_eval_tensor_chipping_of_bcast);
CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_reshape_with_bcast);
CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_forced_eval);
CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_chipping_of_bcast);
CALL_SUBTESTS_DIMS_LAYOUTS(7, test_assign_to_tensor);
CALL_SUBTESTS_DIMS_LAYOUTS(7, test_assign_to_tensor_reshape);
CALL_SUBTESTS_DIMS_LAYOUTS(7, test_assign_to_tensor_chipping);
CALL_SUBTESTS_DIMS_LAYOUTS(8, test_assign_to_tensor_slice);
CALL_SUBTESTS_DIMS_LAYOUTS(8, test_assign_to_tensor_shuffle);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_reshape);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_chipping);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_slice);
CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_shuffle);
// Force CMake to split this test.
// EIGEN_SUFFIXES;1;2;3;4;5;6;7;8

View File

@ -415,7 +415,15 @@ static void test_block_io_squeeze_ones() {
CALL_SUBTEST((NAME<float, 1, ColMajor>())); \
CALL_SUBTEST((NAME<float, 2, ColMajor>())); \
CALL_SUBTEST((NAME<float, 4, ColMajor>())); \
CALL_SUBTEST((NAME<float, 5, ColMajor>()))
CALL_SUBTEST((NAME<float, 5, ColMajor>())); \
CALL_SUBTEST((NAME<bool, 1, RowMajor>())); \
CALL_SUBTEST((NAME<bool, 2, RowMajor>())); \
CALL_SUBTEST((NAME<bool, 4, RowMajor>())); \
CALL_SUBTEST((NAME<bool, 5, RowMajor>())); \
CALL_SUBTEST((NAME<bool, 1, ColMajor>())); \
CALL_SUBTEST((NAME<bool, 2, ColMajor>())); \
CALL_SUBTEST((NAME<bool, 4, ColMajor>())); \
CALL_SUBTEST((NAME<bool, 5, ColMajor>()))
EIGEN_DECLARE_TEST(cxx11_tensor_block_io) {
// clang-format off

View File

@ -562,36 +562,40 @@ static void test_large_contraction_with_output_kernel() {
EIGEN_DECLARE_TEST(cxx11_tensor_contraction)
{
CALL_SUBTEST(test_evals<ColMajor>());
CALL_SUBTEST(test_evals<RowMajor>());
CALL_SUBTEST(test_scalar<ColMajor>());
CALL_SUBTEST(test_scalar<RowMajor>());
CALL_SUBTEST(test_multidims<ColMajor>());
CALL_SUBTEST(test_multidims<RowMajor>());
CALL_SUBTEST(test_holes<ColMajor>());
CALL_SUBTEST(test_holes<RowMajor>());
CALL_SUBTEST(test_full_redux<ColMajor>());
CALL_SUBTEST(test_full_redux<RowMajor>());
CALL_SUBTEST(test_contraction_of_contraction<ColMajor>());
CALL_SUBTEST(test_contraction_of_contraction<RowMajor>());
CALL_SUBTEST(test_expr<ColMajor>());
CALL_SUBTEST(test_expr<RowMajor>());
CALL_SUBTEST(test_out_of_order_contraction<ColMajor>());
CALL_SUBTEST(test_out_of_order_contraction<RowMajor>());
CALL_SUBTEST(test_consistency<ColMajor>());
CALL_SUBTEST(test_consistency<RowMajor>());
CALL_SUBTEST(test_large_contraction<ColMajor>());
CALL_SUBTEST(test_large_contraction<RowMajor>());
CALL_SUBTEST(test_matrix_vector<ColMajor>());
CALL_SUBTEST(test_matrix_vector<RowMajor>());
CALL_SUBTEST(test_tensor_vector<ColMajor>());
CALL_SUBTEST(test_tensor_vector<RowMajor>());
CALL_SUBTEST(test_small_blocking_factors<ColMajor>());
CALL_SUBTEST(test_small_blocking_factors<RowMajor>());
CALL_SUBTEST(test_tensor_product<ColMajor>());
CALL_SUBTEST(test_tensor_product<RowMajor>());
CALL_SUBTEST(test_const_inputs<ColMajor>());
CALL_SUBTEST(test_const_inputs<RowMajor>());
CALL_SUBTEST(test_large_contraction_with_output_kernel<ColMajor>());
CALL_SUBTEST(test_large_contraction_with_output_kernel<RowMajor>());
CALL_SUBTEST_1(test_evals<ColMajor>());
CALL_SUBTEST_1(test_evals<RowMajor>());
CALL_SUBTEST_1(test_scalar<ColMajor>());
CALL_SUBTEST_1(test_scalar<RowMajor>());
CALL_SUBTEST_2(test_multidims<ColMajor>());
CALL_SUBTEST_2(test_multidims<RowMajor>());
CALL_SUBTEST_2(test_holes<ColMajor>());
CALL_SUBTEST_2(test_holes<RowMajor>());
CALL_SUBTEST_3(test_full_redux<ColMajor>());
CALL_SUBTEST_3(test_full_redux<RowMajor>());
CALL_SUBTEST_3(test_contraction_of_contraction<ColMajor>());
CALL_SUBTEST_3(test_contraction_of_contraction<RowMajor>());
CALL_SUBTEST_4(test_expr<ColMajor>());
CALL_SUBTEST_4(test_expr<RowMajor>());
CALL_SUBTEST_4(test_out_of_order_contraction<ColMajor>());
CALL_SUBTEST_4(test_out_of_order_contraction<RowMajor>());
CALL_SUBTEST_5(test_consistency<ColMajor>());
CALL_SUBTEST_5(test_consistency<RowMajor>());
CALL_SUBTEST_5(test_large_contraction<ColMajor>());
CALL_SUBTEST_5(test_large_contraction<RowMajor>());
CALL_SUBTEST_6(test_matrix_vector<ColMajor>());
CALL_SUBTEST_6(test_matrix_vector<RowMajor>());
CALL_SUBTEST_6(test_tensor_vector<ColMajor>());
CALL_SUBTEST_6(test_tensor_vector<RowMajor>());
CALL_SUBTEST_7(test_small_blocking_factors<ColMajor>());
CALL_SUBTEST_7(test_small_blocking_factors<RowMajor>());
CALL_SUBTEST_7(test_tensor_product<ColMajor>());
CALL_SUBTEST_7(test_tensor_product<RowMajor>());
CALL_SUBTEST_8(test_const_inputs<ColMajor>());
CALL_SUBTEST_8(test_const_inputs<RowMajor>());
CALL_SUBTEST_8(test_large_contraction_with_output_kernel<ColMajor>());
CALL_SUBTEST_8(test_large_contraction_with_output_kernel<RowMajor>());
// Force CMake to split this test.
// EIGEN_SUFFIXES;1;2;3;4;5;6;7;8
}

View File

@ -195,26 +195,23 @@ static void test_constants()
static void test_boolean()
{
Tensor<int, 1> vec(31);
std::iota(vec.data(), vec.data() + 31, 0);
const int kSize = 31;
Tensor<int, 1> vec(kSize);
std::iota(vec.data(), vec.data() + kSize, 0);
// Test ||.
Tensor<bool, 1> bool1 = vec < vec.constant(1) || vec > vec.constant(4);
VERIFY_IS_EQUAL(bool1[0], true);
VERIFY_IS_EQUAL(bool1[1], false);
VERIFY_IS_EQUAL(bool1[2], false);
VERIFY_IS_EQUAL(bool1[3], false);
VERIFY_IS_EQUAL(bool1[4], false);
VERIFY_IS_EQUAL(bool1[5], true);
for (int i = 0; i < kSize; ++i) {
bool expected = i < 1 || i > 4;
VERIFY_IS_EQUAL(bool1[i], expected);
}
// Test &&, including cast of operand vec.
Tensor<bool, 1> bool2 = vec.cast<bool>() && vec < vec.constant(4);
VERIFY_IS_EQUAL(bool2[0], false);
VERIFY_IS_EQUAL(bool2[1], true);
VERIFY_IS_EQUAL(bool2[2], true);
VERIFY_IS_EQUAL(bool2[3], true);
VERIFY_IS_EQUAL(bool2[4], false);
VERIFY_IS_EQUAL(bool2[5], false);
for (int i = 0; i < kSize; ++i) {
bool expected = bool(i) && i < 4;
VERIFY_IS_EQUAL(bool2[i], expected);
}
// Compilation tests:
// Test Tensor<bool> against results of cast or comparison; verifies that

View File

@ -113,19 +113,19 @@ static void test_reshape_as_lvalue()
}
}
template<int DataLayout>
template<typename T, int DataLayout>
static void test_simple_slice()
{
Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
Tensor<T, 5, DataLayout> tensor(2,3,5,7,11);
tensor.setRandom();
Tensor<float, 5, DataLayout> slice1(1,1,1,1,1);
Tensor<T, 5, DataLayout> slice1(1,1,1,1,1);
Eigen::DSizes<ptrdiff_t, 5> indices(1,2,3,4,5);
Eigen::DSizes<ptrdiff_t, 5> sizes(1,1,1,1,1);
slice1 = tensor.slice(indices, sizes);
VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
Tensor<float, 5, DataLayout> slice2(1,1,2,2,3);
Tensor<T, 5, DataLayout> slice2(1,1,2,2,3);
Eigen::DSizes<ptrdiff_t, 5> indices2(1,1,3,4,5);
Eigen::DSizes<ptrdiff_t, 5> sizes2(1,1,2,2,3);
slice2 = tensor.slice(indices2, sizes2);
@ -138,20 +138,20 @@ static void test_simple_slice()
}
}
template<typename=void>
template<typename T>
static void test_const_slice()
{
const float b[1] = {42};
TensorMap<Tensor<const float, 1> > m(b, 1);
const T b[1] = {42};
TensorMap<Tensor<const T, 1> > m(b, 1);
DSizes<DenseIndex, 1> offsets;
offsets[0] = 0;
TensorRef<Tensor<const float, 1> > slice_ref(m.slice(offsets, m.dimensions()));
TensorRef<Tensor<const T, 1> > slice_ref(m.slice(offsets, m.dimensions()));
VERIFY_IS_EQUAL(slice_ref(0), 42);
}
template<int DataLayout>
template<typename T, int DataLayout>
static void test_slice_in_expr() {
typedef Matrix<float, Dynamic, Dynamic, DataLayout> Mtx;
typedef Matrix<T, Dynamic, Dynamic, DataLayout> Mtx;
Mtx m1(7,7);
Mtx m2(3,3);
m1.setRandom();
@ -159,10 +159,10 @@ static void test_slice_in_expr() {
Mtx m3 = m1.block(1, 2, 3, 3) * m2.block(0, 2, 3, 1);
TensorMap<Tensor<float, 2, DataLayout>> tensor1(m1.data(), 7, 7);
TensorMap<Tensor<float, 2, DataLayout>> tensor2(m2.data(), 3, 3);
Tensor<float, 2, DataLayout> tensor3(3,1);
typedef Tensor<float, 1>::DimensionPair DimPair;
TensorMap<Tensor<T, 2, DataLayout>> tensor1(m1.data(), 7, 7);
TensorMap<Tensor<T, 2, DataLayout>> tensor2(m2.data(), 3, 3);
Tensor<T, 2, DataLayout> tensor3(3,1);
typedef typename Tensor<T, 1>::DimensionPair DimPair;
array<DimPair, 1> contract_along{{DimPair(1, 0)}};
Eigen::DSizes<ptrdiff_t, 2> indices1(1,2);
@ -179,28 +179,28 @@ static void test_slice_in_expr() {
}
// Take an arbitrary slice of an arbitrarily sized tensor.
TensorMap<Tensor<const float, 2, DataLayout>> tensor4(m1.data(), 7, 7);
Tensor<float, 1, DataLayout> tensor6 = tensor4.reshape(DSizes<ptrdiff_t, 1>(7*7)).exp().slice(DSizes<ptrdiff_t, 1>(0), DSizes<ptrdiff_t, 1>(35));
TensorMap<Tensor<const T, 2, DataLayout>> tensor4(m1.data(), 7, 7);
Tensor<T, 1, DataLayout> tensor6 = tensor4.reshape(DSizes<ptrdiff_t, 1>(7*7)).exp().slice(DSizes<ptrdiff_t, 1>(0), DSizes<ptrdiff_t, 1>(35));
for (int i = 0; i < 35; ++i) {
VERIFY_IS_APPROX(tensor6(i), expf(tensor4.data()[i]));
}
}
template<int DataLayout>
template<typename T, int DataLayout>
static void test_slice_as_lvalue()
{
Tensor<float, 3, DataLayout> tensor1(2,2,7);
Tensor<T, 3, DataLayout> tensor1(2,2,7);
tensor1.setRandom();
Tensor<float, 3, DataLayout> tensor2(2,2,7);
Tensor<T, 3, DataLayout> tensor2(2,2,7);
tensor2.setRandom();
Tensor<float, 3, DataLayout> tensor3(4,3,5);
Tensor<T, 3, DataLayout> tensor3(4,3,5);
tensor3.setRandom();
Tensor<float, 3, DataLayout> tensor4(4,3,2);
Tensor<T, 3, DataLayout> tensor4(4,3,2);
tensor4.setRandom();
Tensor<float, 3, DataLayout> tensor5(10,13,12);
Tensor<T, 3, DataLayout> tensor5(10,13,12);
tensor5.setRandom();
Tensor<float, 3, DataLayout> result(4,5,7);
Tensor<T, 3, DataLayout> result(4,5,7);
Eigen::DSizes<ptrdiff_t, 3> sizes12(2,2,7);
Eigen::DSizes<ptrdiff_t, 3> first_slice(0,0,0);
result.slice(first_slice, sizes12) = tensor1;
@ -246,10 +246,10 @@ static void test_slice_as_lvalue()
}
}
template<int DataLayout>
template<typename T, int DataLayout>
static void test_slice_raw_data()
{
Tensor<float, 4, DataLayout> tensor(3,5,7,11);
Tensor<T, 4, DataLayout> tensor(3,5,7,11);
tensor.setRandom();
Eigen::DSizes<ptrdiff_t, 4> offsets(1,2,3,4);
@ -276,7 +276,7 @@ static void test_slice_raw_data()
extents = Eigen::DSizes<ptrdiff_t, 4>(1,2,1,1);
auto slice3 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice());
VERIFY_IS_EQUAL(slice3.dimensions().TotalSize(), 2);
VERIFY_IS_EQUAL(slice3.data(), static_cast<float*>(0));
VERIFY_IS_EQUAL(slice3.data(), static_cast<T*>(0));
if (DataLayout == ColMajor) {
offsets = Eigen::DSizes<ptrdiff_t, 4>(0,2,3,4);
@ -341,15 +341,15 @@ static void test_slice_raw_data()
}
template<int DataLayout>
template<typename T, int DataLayout>
static void test_strided_slice()
{
typedef Tensor<float, 5, DataLayout> Tensor5f;
typedef Tensor<T, 5, DataLayout> Tensor5f;
typedef Eigen::DSizes<Eigen::DenseIndex, 5> Index5;
typedef Tensor<float, 2, DataLayout> Tensor2f;
typedef Tensor<T, 2, DataLayout> Tensor2f;
typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2;
Tensor<float, 5, DataLayout> tensor(2,3,5,7,11);
Tensor<float, 2, DataLayout> tensor2(7,11);
Tensor<T, 5, DataLayout> tensor(2,3,5,7,11);
Tensor<T, 2, DataLayout> tensor2(7,11);
tensor.setRandom();
tensor2.setRandom();
@ -435,13 +435,13 @@ static void test_strided_slice()
}
}
template<int DataLayout>
template<typename T, int DataLayout>
static void test_strided_slice_write()
{
typedef Tensor<float, 2, DataLayout> Tensor2f;
typedef Tensor<T, 2, DataLayout> Tensor2f;
typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2;
Tensor<float, 2, DataLayout> tensor(7,11),tensor2(7,11);
Tensor<T, 2, DataLayout> tensor(7,11),tensor2(7,11);
tensor.setRandom();
tensor2=tensor;
Tensor2f slice(2,3);
@ -461,15 +461,14 @@ static void test_strided_slice_write()
}
}
template<int DataLayout>
template<typename T, int DataLayout>
static void test_composition()
{
Eigen::Tensor<float, 2, DataLayout> matrix(7, 11);
Eigen::Tensor<T, 2, DataLayout> matrix(7, 11);
matrix.setRandom();
const DSizes<ptrdiff_t, 3> newDims(1, 1, 11);
Eigen::Tensor<float, 3, DataLayout> tensor =
Eigen::Tensor<T, 3, DataLayout> tensor =
matrix.slice(DSizes<ptrdiff_t, 2>(2, 0), DSizes<ptrdiff_t, 2>(1, 11)).reshape(newDims);
VERIFY_IS_EQUAL(tensor.dimensions().TotalSize(), 11);
@ -481,29 +480,27 @@ static void test_composition()
}
}
#define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART
#define CALL_SUBTESTS_TYPES_LAYOUTS(PART, NAME) \
CALL_SUBTEST_PART(PART)((NAME<float, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<float, RowMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, ColMajor>())); \
CALL_SUBTEST_PART(PART)((NAME<bool, RowMajor>()))
EIGEN_DECLARE_TEST(cxx11_tensor_morphing)
{
CALL_SUBTEST_1(test_simple_reshape<void>());
CALL_SUBTEST_1(test_static_reshape<void>());
CALL_SUBTEST_1(test_reshape_in_expr<void>());
CALL_SUBTEST_1(test_reshape_as_lvalue<void>());
CALL_SUBTEST_1(test_reshape_in_expr<void>());
CALL_SUBTEST_1(test_const_slice<float>());
CALL_SUBTEST_1(test_simple_slice<ColMajor>());
CALL_SUBTEST_1(test_simple_slice<RowMajor>());
CALL_SUBTEST_1(test_const_slice());
CALL_SUBTEST_2(test_slice_in_expr<ColMajor>());
CALL_SUBTEST_3(test_slice_in_expr<RowMajor>());
CALL_SUBTEST_4(test_slice_as_lvalue<ColMajor>());
CALL_SUBTEST_4(test_slice_as_lvalue<RowMajor>());
CALL_SUBTEST_5(test_slice_raw_data<ColMajor>());
CALL_SUBTEST_5(test_slice_raw_data<RowMajor>());
CALL_SUBTEST_6(test_strided_slice_write<ColMajor>());
CALL_SUBTEST_6(test_strided_slice<ColMajor>());
CALL_SUBTEST_6(test_strided_slice_write<RowMajor>());
CALL_SUBTEST_6(test_strided_slice<RowMajor>());
CALL_SUBTEST_7(test_composition<ColMajor>());
CALL_SUBTEST_7(test_composition<RowMajor>());
CALL_SUBTESTS_TYPES_LAYOUTS(2, test_simple_slice);
CALL_SUBTESTS_TYPES_LAYOUTS(3, test_slice_as_lvalue);
CALL_SUBTESTS_TYPES_LAYOUTS(4, test_slice_raw_data);
CALL_SUBTESTS_TYPES_LAYOUTS(5, test_strided_slice_write);
CALL_SUBTESTS_TYPES_LAYOUTS(6, test_strided_slice);
CALL_SUBTESTS_TYPES_LAYOUTS(7, test_composition);
}