diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 32cedd0b1..a734d99b7 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -215,19 +215,166 @@ pmul(const bool& a, const bool& b) { return a && b; } template EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) { return a/b; } +/** \internal \returns one bits */ +template EIGEN_DEVICE_FUNC inline Packet +ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;} + +/** \internal \returns zero bits */ +template EIGEN_DEVICE_FUNC inline Packet +pzero(const Packet& /*a*/) { Packet b; memset((void*)&b, 0, sizeof(b)); return b;} + +/** \internal \returns a <= b as a bit mask */ +template EIGEN_DEVICE_FUNC inline Packet +pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); } + +/** \internal \returns a < b as a bit mask */ +template EIGEN_DEVICE_FUNC inline Packet +pcmp_lt(const Packet& a, const Packet& b) { return a EIGEN_DEVICE_FUNC inline Packet +pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); } + +/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ +template EIGEN_DEVICE_FUNC inline Packet +pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); } +template<> EIGEN_DEVICE_FUNC inline float pzero(const float& a) { + EIGEN_UNUSED_VARIABLE(a) + return 0.f; +} + +template<> EIGEN_DEVICE_FUNC inline double pzero(const double& a) { + EIGEN_UNUSED_VARIABLE(a) + return 0.; +} + +template +EIGEN_DEVICE_FUNC inline std::complex ptrue(const std::complex& /*a*/) { + RealScalar b; + b = ptrue(b); + return std::complex(b, b); +} + +template +EIGEN_DEVICE_FUNC inline Packet bitwise_helper(const Packet& a, const Packet& b, Op op) { + const unsigned char* a_ptr = reinterpret_cast(&a); + const unsigned char* b_ptr = reinterpret_cast(&b); + Packet c; + unsigned char* c_ptr = reinterpret_cast(&c); + for (size_t i = 0; i < sizeof(Packet); ++i) { + *c_ptr++ = op(*a_ptr++, *b_ptr++); + } + return c; +} + +/** \internal \returns the bitwise and of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pand(const Packet& a, const Packet& b) { + EIGEN_USING_STD(bit_and); + return bitwise_helper(a ,b, bit_and()); +} + +/** \internal \returns the bitwise or of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +por(const Packet& a, const Packet& b) { + EIGEN_USING_STD(bit_or); + return bitwise_helper(a ,b, bit_or()); +} + +/** \internal \returns the bitwise xor of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pxor(const Packet& a, const Packet& b) { + EIGEN_USING_STD(bit_xor); + return bitwise_helper(a ,b, bit_xor()); +} + +/** \internal \returns the bitwise and of \a a and not \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pandnot(const Packet& a, const Packet& b) { return pand(a, pxor(ptrue(b), b)); } + +/** \internal \returns \a or \b for each field in packet according to \mask */ +template EIGEN_DEVICE_FUNC inline Packet +pselect(const Packet& mask, const Packet& a, const Packet& b) { + return por(pand(a,mask),pandnot(b,mask)); +} + +template<> EIGEN_DEVICE_FUNC inline float pselect( + const float& cond, const float& a, const float&b) { + return numext::equal_strict(cond,0.f) ? b : a; +} + +template<> EIGEN_DEVICE_FUNC inline double pselect( + const double& cond, const double& a, const double& b) { + return numext::equal_strict(cond,0.) ? b : a; +} + +template<> EIGEN_DEVICE_FUNC inline bool pselect( + const bool& cond, const bool& a, const bool& b) { + return cond ? a : b; +} + +/** \internal \returns the min or of \a a and \a b (coeff-wise) + If either \a a or \a b are NaN, the result is implementation defined. */ +template +struct pminmax_impl { + template + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) { + return op(a,b); + } +}; + +/** \internal \returns the min or max of \a a and \a b (coeff-wise) + If either \a a or \a b are NaN, NaN is returned. */ +template<> +struct pminmax_impl { + template + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) { + Packet not_nan_mask_a = pcmp_eq(a, a); + Packet not_nan_mask_b = pcmp_eq(b, b); + return pselect(not_nan_mask_a, + pselect(not_nan_mask_b, op(a, b), b), + a); + } +}; + +/** \internal \returns the min or max of \a a and \a b (coeff-wise) + If both \a a and \a b are NaN, NaN is returned. + Equivalent to std::fmin(a, b). */ +template<> +struct pminmax_impl { + template + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) { + Packet not_nan_mask_a = pcmp_eq(a, a); + Packet not_nan_mask_b = pcmp_eq(b, b); + return pselect(not_nan_mask_a, + pselect(not_nan_mask_b, op(a, b), a), + b); + } +}; + /** \internal \returns the min of \a a and \a b (coeff-wise). If \a a or \b b is NaN, the return value is implementation defined. */ template EIGEN_DEVICE_FUNC inline Packet -pmin(const Packet& a, const Packet& b) { return numext::mini(a, b); } +pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); } + +/** \internal \returns the min of \a a and \a b (coeff-wise). + NaNPropagation determines the NaN propagation semantics. */ +template EIGEN_DEVICE_FUNC inline Packet +pmin(const Packet& a, const Packet& b) { return pminmax_impl::run(a,b, pmin); } /** \internal \returns the max of \a a and \a b (coeff-wise) If \a a or \b b is NaN, the return value is implementation defined. */ template EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); } +/** \internal \returns the max of \a a and \a b (coeff-wise). + NaNPropagation determines the NaN propagation semantics. */ +template EIGEN_DEVICE_FUNC inline Packet +pmax(const Packet& a, const Packet& b) { return pminmax_impl::run(a,b, pmax); } + /** \internal \returns the absolute value of \a a */ template EIGEN_DEVICE_FUNC inline Packet -pabs(const Packet& a) { using std::abs; return abs(a); } +pabs(const Packet& a) { return numext::abs(a); } template<> EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) { return a; } template<> EIGEN_DEVICE_FUNC inline unsigned long @@ -279,105 +426,6 @@ pldexp(const Packet &a, const Packet &exponent) { return ldexp(a, static_cast(exponent)); } -/** \internal \returns zero bits */ -template EIGEN_DEVICE_FUNC inline Packet -pzero(const Packet& /*a*/) { Packet b; memset((void*)&b, 0, sizeof(b)); return b;} - -template<> EIGEN_DEVICE_FUNC inline float pzero(const float& a) { - EIGEN_UNUSED_VARIABLE(a) - return 0.f; -} - -template<> EIGEN_DEVICE_FUNC inline double pzero(const double& a) { - EIGEN_UNUSED_VARIABLE(a) - return 0.; -} - -/** \internal \returns one bits */ -template EIGEN_DEVICE_FUNC inline Packet -ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;} - -template -EIGEN_DEVICE_FUNC inline std::complex ptrue(const std::complex& /*a*/) { - RealScalar b; - b = ptrue(b); - return std::complex(b, b); -} - -template -EIGEN_DEVICE_FUNC inline Packet bitwise_helper(const Packet& a, const Packet& b, Op op) { - const unsigned char* a_ptr = reinterpret_cast(&a); - const unsigned char* b_ptr = reinterpret_cast(&b); - Packet c; - unsigned char* c_ptr = reinterpret_cast(&c); - for (size_t i = 0; i < sizeof(Packet); ++i) { - *c_ptr++ = op(*a_ptr++, *b_ptr++); - } - return c; -} - -/** \internal \returns the bitwise and of \a a and \a b */ -template EIGEN_DEVICE_FUNC inline Packet -pand(const Packet& a, const Packet& b) { - EIGEN_USING_STD(bit_and); - return bitwise_helper(a ,b, bit_and()); -} - -/** \internal \returns the bitwise or of \a a and \a b */ -template EIGEN_DEVICE_FUNC inline Packet -por(const Packet& a, const Packet& b) { - EIGEN_USING_STD(bit_or); - return bitwise_helper(a ,b, bit_or()); -} - -/** \internal \returns the bitwise xor of \a a and \a b */ -template EIGEN_DEVICE_FUNC inline Packet -pxor(const Packet& a, const Packet& b) { - EIGEN_USING_STD(bit_xor); - return bitwise_helper(a ,b, bit_xor()); -} - -/** \internal \returns the bitwise and of \a a and not \a b */ -template EIGEN_DEVICE_FUNC inline Packet -pandnot(const Packet& a, const Packet& b) { return pand(a, pxor(ptrue(b), b)); } - -/** \internal \returns a <= b as a bit mask */ -template EIGEN_DEVICE_FUNC inline Packet -pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); } - -/** \internal \returns a < b as a bit mask */ -template EIGEN_DEVICE_FUNC inline Packet -pcmp_lt(const Packet& a, const Packet& b) { return a EIGEN_DEVICE_FUNC inline Packet -pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); } - -/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ -template EIGEN_DEVICE_FUNC inline Packet -pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); } - -/** \internal \returns \a or \b for each field in packet according to \mask */ -template EIGEN_DEVICE_FUNC inline Packet -pselect(const Packet& mask, const Packet& a, const Packet& b) { - return por(pand(a,mask),pandnot(b,mask)); -} - -template<> EIGEN_DEVICE_FUNC inline float pselect( - const float& cond, const float& a, const float&b) { - return numext::equal_strict(cond,0.f) ? b : a; -} - -template<> EIGEN_DEVICE_FUNC inline double pselect( - const double& cond, const double& a, const double& b) { - return numext::equal_strict(cond,0.) ? b : a; -} - -template<> EIGEN_DEVICE_FUNC inline bool pselect( - const bool& cond, const bool& a, const bool& b) { - return cond ? a : b; -} - /** \internal \returns the min of \a a and \a b (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); } @@ -507,57 +555,6 @@ template EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* a #endif } -/** \internal \returns the first element of a packet */ -template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type pfirst(const Packet& a) -{ return a; } - -/** \internal \returns the sum of the elements of \a a*/ -template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux(const Packet& a) -{ return a; } - -/** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4. - * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7} - * For packet-size smaller or equal to 4, this boils down to a noop. - */ -template EIGEN_DEVICE_FUNC inline -typename conditional<(unpacket_traits::size%8)==0,typename unpacket_traits::half,Packet>::type -predux_half_dowto4(const Packet& a) -{ return a; } - -/** \internal \returns the product of the elements of \a a */ -template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_mul(const Packet& a) -{ return a; } - -/** \internal \returns the min of the elements of \a a */ -template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) -{ return a; } - -/** \internal \returns the max of the elements of \a a */ -template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) -{ return a; } - -/** \internal \returns true if all coeffs of \a a means "true" - * It is supposed to be called on values returned by pcmp_*. - */ -// not needed yet -// template EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a) -// { return bool(a); } - -/** \internal \returns true if any coeffs of \a a means "true" - * It is supposed to be called on values returned by pcmp_*. - */ -template EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) -{ - // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames. - // It is expected that "true" is either: - // - Scalar(1) - // - bits full of ones (NaN for floats), - // - or first bit equals to 1 (1 for ints, smallest denormal for floats). - // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars. - typedef typename unpacket_traits::type Scalar; - return numext::not_equal_strict(predux(a), Scalar(0)); -} - /** \internal \returns the reversed elements of \a a*/ template EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) { return a; } @@ -656,53 +653,104 @@ Packet print(const Packet& a) { using numext::rint; return rint(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } +/** \internal \returns the first element of a packet */ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type +pfirst(const Packet& a) +{ return a; } -/** \internal \returns the max of \a a and \a b (coeff-wise) - If both \a a and \a b are NaN, NaN is returned. - Equivalent to std::fmax(a, b). */ -template EIGEN_DEVICE_FUNC inline Packet -pfmax(const Packet& a, const Packet& b) { - Packet not_nan_mask_a = pcmp_eq(a, a); - Packet not_nan_mask_b = pcmp_eq(b, b); - return pselect(not_nan_mask_a, - pselect(not_nan_mask_b, pmax(a, b), a), - b); +/** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4. + * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7} + * For packet-size smaller or equal to 4, this boils down to a noop. + */ +template +EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits::size%8)==0,typename unpacket_traits::half,Packet>::type +predux_half_dowto4(const Packet& a) +{ return a; } + +// Slow generic implementation of Packet reduction. +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type +predux_helper(const Packet& a, Op op) { + typedef typename unpacket_traits::type Scalar; + const size_t n = unpacket_traits::size; + Scalar elements[n]; + pstoreu(elements, a); + for(size_t k = n / 2; k > 0; k /= 2) { + for(size_t i = 0; i < k; ++i) { + elements[i] = op(elements[i], elements[i + k]); + } + } + return elements[0]; } -/** \internal \returns the min of \a a and \a b (coeff-wise) - If both \a a and \a b are NaN, NaN is returned. - Equivalent to std::fmin(a, b). */ -template EIGEN_DEVICE_FUNC inline Packet -pfmin(const Packet& a, const Packet& b) { - Packet not_nan_mask_a = pcmp_eq(a, a); - Packet not_nan_mask_b = pcmp_eq(b, b); - return pselect(not_nan_mask_a, - pselect(not_nan_mask_b, pmin(a, b), a), - b); +/** \internal \returns the sum of the elements of \a a*/ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type +predux(const Packet& a) +{ + return predux_helper(a, padd::type>); } -/** \internal \returns the max of \a a and \a b (coeff-wise) - If either \a a or \a b are NaN, NaN is returned. */ -template EIGEN_DEVICE_FUNC inline Packet -pfmax_nan(const Packet& a, const Packet& b) { - Packet not_nan_mask_a = pcmp_eq(a, a); - Packet not_nan_mask_b = pcmp_eq(b, b); - return pselect(not_nan_mask_a, - pselect(not_nan_mask_b, pmax(a, b), b), - a); +/** \internal \returns the product of the elements of \a a */ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type +predux_mul(const Packet& a) +{ + return predux_helper(a, pmul::type>); } -/** \internal \returns the min of \a a and \a b (coeff-wise) - If either \a a or \a b are NaN, NaN is returned. */ -template EIGEN_DEVICE_FUNC inline Packet -pfmin_nan(const Packet& a, const Packet& b) { - Packet not_nan_mask_a = pcmp_eq(a, a); - Packet not_nan_mask_b = pcmp_eq(b, b); - return pselect(not_nan_mask_a, - pselect(not_nan_mask_b, pmin(a, b), b), - a); +/** \internal \returns the min of the elements of \a a */ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type +predux_min(const Packet& a) +{ + return predux_helper(a, pmin::type>); } +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type +predux_min(const Packet& a) +{ + return predux_helper(a, pmin::type>); +} + +/** \internal \returns the max of the elements of \a a */ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type +predux_max(const Packet& a) +{ + return predux_helper(a, pmax::type>); +} + +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type +predux_max(const Packet& a) +{ + return predux_helper(a, pmax::type>); +} + +/** \internal \returns true if all coeffs of \a a means "true" + * It is supposed to be called on values returned by pcmp_*. + */ +// not needed yet +// template EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a) +// { return bool(a); } + +/** \internal \returns true if any coeffs of \a a means "true" + * It is supposed to be called on values returned by pcmp_*. + */ +template EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) +{ + // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames. + // It is expected that "true" is either: + // - Scalar(1) + // - bits full of ones (NaN for floats), + // - or first bit equals to 1 (1 for ints, smallest denormal for floats). + // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars. + typedef typename unpacket_traits::type Scalar; + return numext::not_equal_strict(predux(a), Scalar(0)); +} /*************************************************************************** * The following functions might not have to be overwritten for vectorized types diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 55650bb8d..f3509c4b9 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -140,29 +140,18 @@ struct scalar_min_op : binary_op_base typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { - if (NaNPropagation == PropagateFast) { - return numext::mini(a, b); - } else if (NaNPropagation == PropagateNumbers) { - return internal::pfmin(a,b); - } else if (NaNPropagation == PropagateNaN) { - return internal::pfmin_nan(a,b); - } + return internal::pmin(a, b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const { - if (NaNPropagation == PropagateFast) { - return internal::pmin(a,b); - } else if (NaNPropagation == PropagateNumbers) { - return internal::pfmin(a,b); - } else if (NaNPropagation == PropagateNaN) { - return internal::pfmin_nan(a,b); - } + return internal::pmin(a,b); } - // TODO(rmlarsen): Handle all NaN propagation semantics reductions. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const - { return internal::predux_min(a); } + { + return internal::predux_min(a); + } }; template @@ -184,29 +173,18 @@ struct scalar_max_op : binary_op_base typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const LhsScalar& a, const RhsScalar& b) const { - if (NaNPropagation == PropagateFast) { - return numext::maxi(a, b); - } else if (NaNPropagation == PropagateNumbers) { - return internal::pfmax(a,b); - } else if (NaNPropagation == PropagateNaN) { - return internal::pfmax_nan(a,b); - } + return internal::pmax(a,b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) const { - if (NaNPropagation == PropagateFast) { - return internal::pmax(a,b); - } else if (NaNPropagation == PropagateNumbers) { - return internal::pfmax(a,b); - } else if (NaNPropagation == PropagateNaN) { - return internal::pfmax_nan(a,b); - } + return internal::pmax(a,b); } - // TODO(rmlarsen): Handle all NaN propagation semantics reductions. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type predux(const Packet& a) const - { return internal::predux_max(a); } + { + return internal::predux_max(a); + } }; template diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 53c41c967..c6a1648ba 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -801,10 +801,6 @@ void packetmath_notcomplex() { Array::Map(data1, PacketSize * 4).setRandom(); - ref[0] = data1[0]; - for (int i = 0; i < PacketSize; ++i) ref[0] = (std::min)(ref[0], data1[i]); - VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload(data1))) && "internal::predux_min"); - VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin); VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax); @@ -817,13 +813,16 @@ void packetmath_notcomplex() { using ::fmin; using ::fmax; #endif - CHECK_CWISE2_IF(PacketTraits::HasMin, fmin, internal::pfmin); - CHECK_CWISE2_IF(PacketTraits::HasMax, fmax, internal::pfmax); + CHECK_CWISE2_IF(PacketTraits::HasMin, fmin, (internal::pmin)); + CHECK_CWISE2_IF(PacketTraits::HasMax, fmax, internal::pmax); CHECK_CWISE1(numext::abs, internal::pabs); CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff); ref[0] = data1[0]; - for (int i = 0; i < PacketSize; ++i) ref[0] = (std::max)(ref[0], data1[i]); + for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmin(ref[0], data1[i]); + VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload(data1))) && "internal::predux_min"); + ref[0] = data1[0]; + for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmax(ref[0], data1[i]); VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload(data1))) && "internal::predux_max"); for (int i = 0; i < PacketSize; ++i) ref[i] = data1[0] + Scalar(i); @@ -852,16 +851,47 @@ void packetmath_notcomplex() { } } - for (int i = 0; i < PacketSize; ++i) { - data1[i] = internal::random() ? std::numeric_limits::quiet_NaN() : Scalar(0); - data1[i + PacketSize] = internal::random() ? std::numeric_limits::quiet_NaN() : Scalar(0); + + // Test NaN propagation. + if (!NumTraits::IsInteger) { + // Test reductions with no NaNs. + ref[0] = data1[0]; + for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmin(ref[0], data1[i]); + VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload(data1))) && "internal::predux_min"); + ref[0] = data1[0]; + for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmin(ref[0], data1[i]); + VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload(data1))) && "internal::predux_min"); + ref[0] = data1[0]; + for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmax(ref[0], data1[i]); + VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload(data1))) && "internal::predux_max"); + ref[0] = data1[0]; + for (int i = 0; i < PacketSize; ++i) ref[0] = internal::pmax(ref[0], data1[i]); + VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload(data1))) && "internal::predux_max"); + // A single NaN. + const size_t index = std::numeric_limits::quiet_NaN() % PacketSize; + data1[index] = std::numeric_limits::quiet_NaN(); + VERIFY(PacketSize==1 || !(numext::isnan)(internal::predux_min(internal::pload(data1)))); + VERIFY((numext::isnan)(internal::predux_min(internal::pload(data1)))); + VERIFY(PacketSize==1 || !(numext::isnan)(internal::predux_max(internal::pload(data1)))); + VERIFY((numext::isnan)(internal::predux_max(internal::pload(data1)))); + // All NaNs. + for (int i = 0; i < 4 * PacketSize; ++i) data1[i] = std::numeric_limits::quiet_NaN(); + VERIFY((numext::isnan)(internal::predux_min(internal::pload(data1)))); + VERIFY((numext::isnan)(internal::predux_min(internal::pload(data1)))); + VERIFY((numext::isnan)(internal::predux_max(internal::pload(data1)))); + VERIFY((numext::isnan)(internal::predux_max(internal::pload(data1)))); + + // Test NaN propagation for coefficient-wise min and max. + for (int i = 0; i < PacketSize; ++i) { + data1[i] = internal::random() ? std::numeric_limits::quiet_NaN() : Scalar(0); + data1[i + PacketSize] = internal::random() ? std::numeric_limits::quiet_NaN() : Scalar(0); + } + // Note: NaN propagation is implementation defined for pmin/pmax, so we do not test it here. + CHECK_CWISE2_IF(PacketTraits::HasMin, fmin, (internal::pmin)); + CHECK_CWISE2_IF(PacketTraits::HasMax, fmax, internal::pmax); + CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_nan_min, (internal::pmin)); + CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_nan_max, internal::pmax); } - // Test NaN propagation for pfmin and pfmax. It should be equivalent to std::fmin. - // Note: NaN propagation is implementation defined for pmin/pmax, so we do not test it here. - CHECK_CWISE2_IF(PacketTraits::HasMin, fmin, internal::pfmin); - CHECK_CWISE2_IF(PacketTraits::HasMax, fmax, internal::pfmax); - CHECK_CWISE2_IF(PacketTraits::HasMin, propagate_nan_min, internal::pfmin_nan); - CHECK_CWISE2_IF(PacketTraits::HasMax, propagate_nan_max, internal::pfmax_nan); } template <> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index ef332dd19..3a70d8517 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -682,28 +682,30 @@ class TensorBase return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::ProdReducer()); } - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp, const Dims, const Derived> + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> maximum(const Dims& dims) const { - return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MaxReducer()); + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MaxReducer()); } - const TensorReductionOp, const DimensionList, const Derived> + template + const TensorReductionOp, const DimensionList, const Derived> maximum() const { DimensionList in_dims; - return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MaxReducer()); + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MaxReducer()); } - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorReductionOp, const Dims, const Derived> + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> minimum(const Dims& dims) const { - return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MinReducer()); + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MinReducer()); } - const TensorReductionOp, const DimensionList, const Derived> + template + const TensorReductionOp, const DimensionList, const Derived> minimum() const { DimensionList in_dims; - return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MinReducer()); + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MinReducer()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 2edc45f1a..fd8fa00fa 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -192,17 +192,19 @@ struct MinMaxBottomValue { }; -template struct MaxReducer +template struct MaxReducer { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - if (t > *accum) { *accum = t; } + scalar_max_op op; + *accum = op(t, *accum); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = pmax(*accum, p); + scalar_max_op op; + (*accum) = op.packetOp(*accum, p); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return MinMaxBottomValue::IsInteger>::bottom_value(); + return MinMaxBottomValue::IsInteger>::bottom_value(); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { @@ -217,32 +219,34 @@ template struct MaxReducer } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return numext::maxi(saccum, predux_max(vaccum)); + scalar_max_op op; + return op(saccum, op.predux(vaccum)); } }; -template -struct reducer_traits, Device> { +template + struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, PacketAccess = PacketType::HasMax, IsStateful = false, - IsExactlyAssociative = true + IsExactlyAssociative = (NaNPropagation!=PropagateFast) }; }; - -template struct MinReducer +template struct MinReducer { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - if (t < *accum) { *accum = t; } + scalar_min_op op; + *accum = op(t, *accum); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { - (*accum) = pmin(*accum, p); + scalar_min_op op; + (*accum) = op.packetOp(*accum, p); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return MinMaxBottomValue::IsInteger>::bottom_value(); + return MinMaxBottomValue::IsInteger>::bottom_value(); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { @@ -257,21 +261,21 @@ template struct MinReducer } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return numext::mini(saccum, predux_min(vaccum)); + scalar_min_op op; + return op(saccum, op.predux(vaccum)); } }; -template -struct reducer_traits, Device> { +template + struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, PacketAccess = PacketType::HasMin, IsStateful = false, - IsExactlyAssociative = true + IsExactlyAssociative = (NaNPropagation!=PropagateFast) }; }; - template struct ProdReducer { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { @@ -282,7 +286,6 @@ template struct ProdReducer EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { (*accum) = pmul(*accum, p); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { internal::scalar_cast_op conv; return conv(1); diff --git a/unsupported/test/cxx11_tensor_expr.cpp b/unsupported/test/cxx11_tensor_expr.cpp index 7fac3b4ed..556d01d4d 100644 --- a/unsupported/test/cxx11_tensor_expr.cpp +++ b/unsupported/test/cxx11_tensor_expr.cpp @@ -302,12 +302,17 @@ static void test_select() template void test_minmax_nan_propagation_templ() { for (int size = 1; size < 17; ++size) { - const Scalar kNan = std::numeric_limits::quiet_NaN(); + std::cout << "size = " << size << std::endl; + const Scalar kNaN = std::numeric_limits::quiet_NaN(); + const Scalar kInf = std::numeric_limits::infinity(); const Scalar kZero(0); - Tensor vec_nan(size); + Tensor vec_all_nan(size); + Tensor vec_one_nan(size); Tensor vec_zero(size); - vec_nan.setConstant(kNan); + vec_all_nan.setConstant(kNaN); vec_zero.setZero(); + vec_one_nan.setZero(); + vec_one_nan(size/2) = kNaN; auto verify_all_nan = [&](const Tensor& v) { for (int i = 0; i < size; ++i) { @@ -326,12 +331,12 @@ void test_minmax_nan_propagation_templ() { // max(nan, 0) = nan // max(0, nan) = nan // max(0, 0) = 0 - verify_all_nan(vec_nan.template cwiseMax(kNan)); - verify_all_nan(vec_nan.template cwiseMax(vec_nan)); - verify_all_nan(vec_nan.template cwiseMax(kZero)); - verify_all_nan(vec_nan.template cwiseMax(vec_zero)); - verify_all_nan(vec_zero.template cwiseMax(kNan)); - verify_all_nan(vec_zero.template cwiseMax(vec_nan)); + verify_all_nan(vec_all_nan.template cwiseMax(kNaN)); + verify_all_nan(vec_all_nan.template cwiseMax(vec_all_nan)); + verify_all_nan(vec_all_nan.template cwiseMax(kZero)); + verify_all_nan(vec_all_nan.template cwiseMax(vec_zero)); + verify_all_nan(vec_zero.template cwiseMax(kNaN)); + verify_all_nan(vec_zero.template cwiseMax(vec_all_nan)); verify_all_zero(vec_zero.template cwiseMax(kZero)); verify_all_zero(vec_zero.template cwiseMax(vec_zero)); @@ -340,12 +345,12 @@ void test_minmax_nan_propagation_templ() { // max(nan, 0) = 0 // max(0, nan) = 0 // max(0, 0) = 0 - verify_all_nan(vec_nan.template cwiseMax(kNan)); - verify_all_nan(vec_nan.template cwiseMax(vec_nan)); - verify_all_zero(vec_nan.template cwiseMax(kZero)); - verify_all_zero(vec_nan.template cwiseMax(vec_zero)); - verify_all_zero(vec_zero.template cwiseMax(kNan)); - verify_all_zero(vec_zero.template cwiseMax(vec_nan)); + verify_all_nan(vec_all_nan.template cwiseMax(kNaN)); + verify_all_nan(vec_all_nan.template cwiseMax(vec_all_nan)); + verify_all_zero(vec_all_nan.template cwiseMax(kZero)); + verify_all_zero(vec_all_nan.template cwiseMax(vec_zero)); + verify_all_zero(vec_zero.template cwiseMax(kNaN)); + verify_all_zero(vec_zero.template cwiseMax(vec_all_nan)); verify_all_zero(vec_zero.template cwiseMax(kZero)); verify_all_zero(vec_zero.template cwiseMax(vec_zero)); @@ -354,12 +359,12 @@ void test_minmax_nan_propagation_templ() { // min(nan, 0) = nan // min(0, nan) = nan // min(0, 0) = 0 - verify_all_nan(vec_nan.template cwiseMin(kNan)); - verify_all_nan(vec_nan.template cwiseMin(vec_nan)); - verify_all_nan(vec_nan.template cwiseMin(kZero)); - verify_all_nan(vec_nan.template cwiseMin(vec_zero)); - verify_all_nan(vec_zero.template cwiseMin(kNan)); - verify_all_nan(vec_zero.template cwiseMin(vec_nan)); + verify_all_nan(vec_all_nan.template cwiseMin(kNaN)); + verify_all_nan(vec_all_nan.template cwiseMin(vec_all_nan)); + verify_all_nan(vec_all_nan.template cwiseMin(kZero)); + verify_all_nan(vec_all_nan.template cwiseMin(vec_zero)); + verify_all_nan(vec_zero.template cwiseMin(kNaN)); + verify_all_nan(vec_zero.template cwiseMin(vec_all_nan)); verify_all_zero(vec_zero.template cwiseMin(kZero)); verify_all_zero(vec_zero.template cwiseMin(vec_zero)); @@ -368,14 +373,49 @@ void test_minmax_nan_propagation_templ() { // min(nan, 0) = 0 // min(0, nan) = 0 // min(0, 0) = 0 - verify_all_nan(vec_nan.template cwiseMin(kNan)); - verify_all_nan(vec_nan.template cwiseMin(vec_nan)); - verify_all_zero(vec_nan.template cwiseMin(kZero)); - verify_all_zero(vec_nan.template cwiseMin(vec_zero)); - verify_all_zero(vec_zero.template cwiseMin(kNan)); - verify_all_zero(vec_zero.template cwiseMin(vec_nan)); + verify_all_nan(vec_all_nan.template cwiseMin(kNaN)); + verify_all_nan(vec_all_nan.template cwiseMin(vec_all_nan)); + verify_all_zero(vec_all_nan.template cwiseMin(kZero)); + verify_all_zero(vec_all_nan.template cwiseMin(vec_zero)); + verify_all_zero(vec_zero.template cwiseMin(kNaN)); + verify_all_zero(vec_zero.template cwiseMin(vec_all_nan)); verify_all_zero(vec_zero.template cwiseMin(kZero)); verify_all_zero(vec_zero.template cwiseMin(vec_zero)); + + // Test min and max reduction + Tensor val; + val = vec_zero.minimum(); + VERIFY_IS_EQUAL(val(), kZero); + val = vec_zero.template minimum(); + VERIFY_IS_EQUAL(val(), kZero); + val = vec_zero.template minimum(); + VERIFY_IS_EQUAL(val(), kZero); + val = vec_zero.maximum(); + VERIFY_IS_EQUAL(val(), kZero); + val = vec_zero.template maximum(); + VERIFY_IS_EQUAL(val(), kZero); + val = vec_zero.template maximum(); + VERIFY_IS_EQUAL(val(), kZero); + + // Test NaN propagation for tensor of all NaNs. + val = vec_all_nan.template minimum(); + VERIFY((numext::isnan)(val())); + val = vec_all_nan.template minimum(); + VERIFY_IS_EQUAL(val(), kInf); + val = vec_all_nan.template maximum(); + VERIFY((numext::isnan)(val())); + val = vec_all_nan.template maximum(); + VERIFY_IS_EQUAL(val(), -kInf); + + // Test NaN propagation for tensor with a single NaN. + val = vec_one_nan.template minimum(); + VERIFY((numext::isnan)(val())); + val = vec_one_nan.template minimum(); + VERIFY_IS_EQUAL(val(), (size == 1 ? kInf : kZero)); + val = vec_one_nan.template maximum(); + VERIFY((numext::isnan)(val())); + val = vec_one_nan.template maximum(); + VERIFY_IS_EQUAL(val(), (size == 1 ? -kInf : kZero)); } }