diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 637aac9f9..ec61ac697 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -224,36 +224,6 @@ pabs(const unsigned long long& a) { return a; } template EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) { using numext::arg; return arg(a); } -/** \internal \returns the bitwise and of \a a and \a b */ -template EIGEN_DEVICE_FUNC inline Packet -pand(const Packet& a, const Packet& b) { return a & b; } - -/** \internal \returns the bitwise or of \a a and \a b */ -template EIGEN_DEVICE_FUNC inline Packet -por(const Packet& a, const Packet& b) { return a | b; } - -/** \internal \returns the bitwise xor of \a a and \a b */ -template EIGEN_DEVICE_FUNC inline Packet -pxor(const Packet& a, const Packet& b) { return a ^ b; } - -/** \internal \returns the bitwise andnot of \a a and \a b */ -template EIGEN_DEVICE_FUNC inline Packet -pandnot(const Packet& a, const Packet& b) { return a & (~b); } - -/** \internal \returns ones */ -template EIGEN_DEVICE_FUNC inline Packet -ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;} - -template -EIGEN_DEVICE_FUNC inline std::complex ptrue(const std::complex& /*a*/) { - RealScalar b; - b = ptrue(b); - return std::complex(b, b); -} - -/** \internal \returns the bitwise not of \a a */ -template EIGEN_DEVICE_FUNC inline Packet -pnot(const Packet& a) { return pxor(ptrue(a), a);} /** \internal \returns \a a logically shifted by N bits to the right */ template EIGEN_DEVICE_FUNC inline int @@ -294,6 +264,35 @@ pldexp(const Packet &a, const Packet &exponent) { return ldexp(a, static_cast(exponent)); } +// Notice: The following ops accept and operator on bitwise masks. +// The value of each field in a masks is Scalar(0) or ~Scalar(0). +// For boolean packet like Packet16b, this is different from the +// representation of true and false, which are 1 and 0. +// As an example +// ptrue() = 0xffffffffffffffffffffffffffffffff +// while +// pset1(true) = 0x01010101010101010101010101010101 + +/** \internal \returns the bitwise and of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pand(const Packet& a, const Packet& b) { return a & b; } + +/** \internal \returns the bitwise or of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +por(const Packet& a, const Packet& b) { return a | b; } + +/** \internal \returns the bitwise xor of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pxor(const Packet& a, const Packet& b) { return a ^ b; } + +/** \internal \returns the bitwise and of \a a and not \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pandnot(const Packet& a, const Packet& b) { return a & (~b); } + +/** \internal \returns ones */ +template EIGEN_DEVICE_FUNC inline Packet +ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;} + /** \internal \returns zeros */ template EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) { return pxor(a,a); } @@ -308,21 +307,16 @@ template<> EIGEN_DEVICE_FUNC inline double pzero(const double& a) { return 0.; } -/** \internal \returns bits of \a or \b according to the input bit mask \a mask */ -template EIGEN_DEVICE_FUNC inline Packet -pselect(const Packet& mask, const Packet& a, const Packet& b) { - return por(pand(a,mask),pandnot(b,mask)); +template +EIGEN_DEVICE_FUNC inline std::complex ptrue(const std::complex& /*a*/) { + RealScalar b; + b = ptrue(b); + return std::complex(b, b); } -template<> EIGEN_DEVICE_FUNC inline float pselect( - const float& mask, const float& a, const float&b) { - return numext::equal_strict(mask,0.f) ? b : a; -} - -template<> EIGEN_DEVICE_FUNC inline double pselect( - const double& mask, const double& a, const double& b) { - return numext::equal_strict(mask,0.) ? b : a; -} +/** \internal \returns the bitwise not of \a a */ +template EIGEN_DEVICE_FUNC inline Packet +pnot(const Packet& a) { return pxor(ptrue(a), a);} /** \internal \returns a <= b as a bit mask */ template EIGEN_DEVICE_FUNC inline Packet @@ -340,6 +334,24 @@ pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); } template EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) { return pnot(pcmp_le(b,a)); } +/** \internal \returns \a or \b for each field in packet according to \mask */ +template EIGEN_DEVICE_FUNC inline Packet +pselect(const Packet& mask, const Packet& a, const Packet& b) { + return por(pand(a,mask),pandnot(b,mask)); +} + +template<> EIGEN_DEVICE_FUNC inline float pselect( + const float& cond, const float& a, const float&b) { + return numext::equal_strict(cond,0.f) ? b : a; +} + +template<> EIGEN_DEVICE_FUNC inline double pselect( + const double& cond, const double& a, const double& b) { + return numext::equal_strict(cond,0.) ? b : a; +} + + + /** \internal \returns the min of \a a and \a b (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 6a4dd5253..a56ae9dde 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -44,10 +44,12 @@ typedef __m128d Packet2d; #endif typedef eigen_packet_wrapper<__m128i, 0> Packet4i; +typedef eigen_packet_wrapper<__m128i, 1> Packet16b; template<> struct is_arithmetic<__m128> { enum { value = true }; }; template<> struct is_arithmetic<__m128i> { enum { value = true }; }; template<> struct is_arithmetic<__m128d> { enum { value = true }; }; +template<> struct is_arithmetic { enum { value = true }; }; #define EIGEN_SSE_SHUFFLE_MASK(p,q,r,s) ((s)<<6|(r)<<4|(q)<<2|(p)) @@ -158,6 +160,30 @@ template<> struct packet_traits : default_packet_traits }; }; +template<> struct packet_traits : default_packet_traits +{ + typedef Packet16b type; + typedef Packet16b half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + HasHalfPacket = 0, + size=16, + + HasAdd = 0, + HasSub = 0, + HasShift = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasReduxp = 0 + }; +}; + template<> struct unpacket_traits { typedef float type; typedef Packet4f half; @@ -174,6 +200,11 @@ template<> struct unpacket_traits { typedef Packet4i half; enum {size=4, alignment=Aligned16, vectorizable=false, masked_load_available=false, masked_store_available=false}; }; +template<> struct unpacket_traits { + typedef bool type; + typedef Packet16b half; + enum {size=16, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; +}; #ifndef EIGEN_VECTORIZE_AVX template<> struct scalar_div_cost { enum { value = 7 }; }; @@ -192,6 +223,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { re template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return _mm_set1_pd(from); } template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return _mm_set1_epi32(from); } #endif +template<> EIGEN_STRONG_INLINE Packet16b pset1(const bool& from) { return _mm_set1_epi8(static_cast(from)); } template<> EIGEN_STRONG_INLINE Packet4f pset1frombits(unsigned int from) { return _mm_castsi128_ps(pset1(from)); } @@ -385,8 +417,11 @@ template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2 template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return _mm_cmplt_epi32(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return _mm_cmpeq_epi32(a,b); } +template<> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { return _mm_cmpeq_epi8(a,b); } + template<> EIGEN_STRONG_INLINE Packet4i ptrue(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); } +template<> EIGEN_STRONG_INLINE Packet16b ptrue(const Packet16b& a) { return _mm_cmpeq_epi32(a, a); } template<> EIGEN_STRONG_INLINE Packet4f ptrue(const Packet4f& a) { Packet4i b = _mm_castps_si128(a); @@ -398,17 +433,21 @@ ptrue(const Packet2d& a) { return _mm_castsi128_pd(_mm_cmpeq_epi32(b, b)); } + template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); } +template<> EIGEN_STRONG_INLINE Packet16b pand(const Packet16b& a, const Packet16b& b) { return _mm_and_si128(a,b); } template<> EIGEN_STRONG_INLINE Packet4f por(const Packet4f& a, const Packet4f& b) { return _mm_or_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d por(const Packet2d& a, const Packet2d& b) { return _mm_or_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i por(const Packet4i& a, const Packet4i& b) { return _mm_or_si128(a,b); } +template<> EIGEN_STRONG_INLINE Packet16b por(const Packet16b& a, const Packet16b& b) { return _mm_or_si128(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pxor(const Packet4f& a, const Packet4f& b) { return _mm_xor_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pxor(const Packet2d& a, const Packet2d& b) { return _mm_xor_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pxor(const Packet4i& a, const Packet4i& b) { return _mm_xor_si128(a,b); } +template<> EIGEN_STRONG_INLINE Packet16b pxor(const Packet16b& a, const Packet16b& b) { return _mm_xor_si128(a,b); } template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, const Packet4f& b) { return _mm_andnot_ps(b,a); } template<> EIGEN_STRONG_INLINE Packet2d pandnot(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); } @@ -471,6 +510,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_ps(from); } template<> EIGEN_STRONG_INLINE Packet2d pload(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_pd(from); } template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast(from)); } +template<> EIGEN_STRONG_INLINE Packet16b pload(const bool* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast(from)); } #if EIGEN_COMP_MSVC template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { @@ -505,6 +545,11 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast(from)); } +template<> EIGEN_STRONG_INLINE Packet16b ploadu(const bool* from) { + EIGEN_DEBUG_UNALIGNED_LOAD + return _mm_loadu_si128(reinterpret_cast(from)); +} + template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) { @@ -522,10 +567,12 @@ template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(to, from); } template<> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd(to, from); } template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); } +template<> EIGEN_STRONG_INLINE void pstore(bool* to, const Packet16b& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_si128(reinterpret_cast<__m128i*>(to), from); } template<> EIGEN_STRONG_INLINE void pstoreu(double* to, const Packet2d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd(to, from); } template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(to, from); } template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } +template<> EIGEN_STRONG_INLINE void pstoreu(bool* to, const Packet16b& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } template<> EIGEN_DEVICE_FUNC inline Packet4f pgather(const float* from, Index stride) { diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 0ea40bab0..a2bc58c76 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -382,11 +382,14 @@ struct functor_traits > { struct scalar_boolean_and_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::pand(a,b); } }; template<> struct functor_traits { enum { Cost = NumTraits::AddCost, - PacketAccess = false + PacketAccess = true }; }; @@ -398,11 +401,14 @@ template<> struct functor_traits { struct scalar_boolean_or_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::por(a,b); } }; template<> struct functor_traits { enum { Cost = NumTraits::AddCost, - PacketAccess = false + PacketAccess = true }; }; @@ -414,11 +420,14 @@ template<> struct functor_traits { struct scalar_boolean_xor_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_xor_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a ^ b; } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const + { return internal::pxor(a,b); } }; template<> struct functor_traits { enum { Cost = NumTraits::AddCost, - PacketAccess = false + PacketAccess = true }; }; diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 5d38ce6b4..761273b86 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -70,6 +70,23 @@ void test_cast() { test_cast_helper::run(); } +template void packetmath_boolean() +{ + const int PacketSize = internal::unpacket_traits::size; + const int size = 2*PacketSize; + EIGEN_ALIGN_MAX Scalar data1[size]; + EIGEN_ALIGN_MAX Scalar data2[size]; + EIGEN_ALIGN_MAX Scalar ref[size]; + + for (int i=0; i(); + } + CHECK_CWISE2_IF(true, internal::por, internal::por); + CHECK_CWISE2_IF(true, internal::pxor, internal::pxor); + CHECK_CWISE2_IF(true, internal::pand, internal::pand); +} + template void packetmath() { typedef internal::packet_traits PacketTraits; @@ -337,21 +354,6 @@ template void packetmath() VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pinsertlast"); } - { - for (int i=0; i(); - unsigned char v = internal::random() ? 0xff : 0; - char* bytes = (char*)(data1+PacketSize+i); - for(int k=0; k void packetmath() } CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt); + + for (int i=0; i(); + } + CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot); + + packetmath_boolean(); } + template void packetmath_real() { typedef internal::packet_traits PacketTraits; @@ -807,6 +818,9 @@ EIGEN_DECLARE_TEST(packetmath) CALL_SUBTEST_11( test::runner >::run() ); CALL_SUBTEST_12( test::runner >::run() ); CALL_SUBTEST_13(( packetmath::type>() )); +#ifdef EIGEN_PACKET_MATH_SSE_H + CALL_SUBTEST_14(( packetmath_boolean::type>() )); +#endif g_first_pass = false; } } diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp index 30924b6b6..d56da28d8 100644 --- a/unsupported/test/cxx11_tensor_expr.cpp +++ b/unsupported/test/cxx11_tensor_expr.cpp @@ -7,6 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#include + #include "main.h" #include @@ -193,8 +195,8 @@ static void test_constants() static void test_boolean() { - Tensor vec(6); - std::copy_n(std::begin({0, 1, 2, 3, 4, 5}), 6, vec.data()); + Tensor vec(31); + std::iota(vec.data(), vec.data() + 31, 0); // Test ||. Tensor bool1 = vec < vec.constant(1) || vec > vec.constant(4);