mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-19 08:09:36 +08:00
Clean up packetmath tests and fix various bugs to make bfloat16 pass (almost) all packetmath tests with SSE, AVX, and AVX512.
This commit is contained in:
parent
7a8d3d5b81
commit
4e4d3f32d1
@ -1205,6 +1205,11 @@ EIGEN_STRONG_INLINE Packet8bf pmax<Packet8bf>(const Packet8bf& a,
|
||||
return F32ToBf16(pmax<Packet8f>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8bf plset<Packet8bf>(const bfloat16& a) {
|
||||
return F32ToBf16(plset<Packet8f>(static_cast<float>(a)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf por(const Packet8bf& a,const Packet8bf& b) {
|
||||
return _mm_or_si128(a,b);
|
||||
}
|
||||
|
@ -35,23 +35,6 @@ struct type_casting_traits<int, float> {
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
||||
return _mm256_cvttps_epi32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
|
||||
return _mm256_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i,Packet8f>(const Packet8f& a) {
|
||||
return _mm256_castps_si256(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f,Packet8i>(const Packet8i& a) {
|
||||
return _mm256_castsi256_ps(a);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512
|
||||
|
||||
template <>
|
||||
@ -63,9 +46,6 @@ struct type_casting_traits<Eigen::half, float> {
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
|
||||
return half2float(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, Eigen::half> {
|
||||
@ -85,10 +65,6 @@ struct type_casting_traits<bfloat16, float> {
|
||||
};
|
||||
};
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8bf, Packet8f>(const Packet8bf& a) {
|
||||
return Bf16ToF32(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, bfloat16> {
|
||||
enum {
|
||||
@ -100,6 +76,30 @@ struct type_casting_traits<float, bfloat16> {
|
||||
|
||||
#endif // EIGEN_VECTORIZE_AVX512
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
|
||||
return _mm256_cvttps_epi32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
|
||||
return _mm256_cvtepi32_ps(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i,Packet8f>(const Packet8f& a) {
|
||||
return _mm256_castps_si256(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f,Packet8i>(const Packet8i& a) {
|
||||
return _mm256_castsi256_ps(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) {
|
||||
return half2float(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8bf, Packet8f>(const Packet8bf& a) {
|
||||
return Bf16ToF32(a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) {
|
||||
return float2half(a);
|
||||
}
|
||||
|
@ -1626,8 +1626,6 @@ template <> struct is_arithmetic<Packet16bf> { enum { value = true }; };
|
||||
template <>
|
||||
struct packet_traits<bfloat16> : default_packet_traits {
|
||||
typedef Packet16bf type;
|
||||
// There is no half-size packet for current Packet16bf.
|
||||
// TODO: support as SSE path.
|
||||
typedef Packet8bf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
@ -1883,6 +1881,11 @@ EIGEN_STRONG_INLINE Packet16bf pmax<Packet16bf>(const Packet16bf& a,
|
||||
return F32ToBf16(pmax<Packet16f>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16bf plset<Packet16bf>(const bfloat16& a) {
|
||||
return F32ToBf16(plset<Packet16f>(static_cast<float>(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8bf predux_half_dowto4<Packet16bf>(const Packet16bf& a) {
|
||||
Packet8bf lane0 = _mm256_extractf128_si256(a, 0);
|
||||
|
@ -103,8 +103,8 @@ struct numeric_limits<Eigen::bfloat16> {
|
||||
static const bool has_infinity = true;
|
||||
static const bool has_quiet_NaN = true;
|
||||
static const bool has_signaling_NaN = true;
|
||||
static const float_denorm_style has_denorm = numeric_limits<float>::has_denorm;
|
||||
static const bool has_denorm_loss = numeric_limits<float>::has_denorm_loss;
|
||||
static const float_denorm_style has_denorm = std::denorm_absent;
|
||||
static const bool has_denorm_loss = false;
|
||||
static const std::float_round_style round_style = numeric_limits<float>::round_style;
|
||||
static const bool is_iec559 = false;
|
||||
static const bool is_bounded = true;
|
||||
@ -551,18 +551,24 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
|
||||
}
|
||||
#if EIGEN_HAS_CXX11_MATH
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
|
||||
return bfloat16(::asinh(float(a)));
|
||||
EIGEN_USING_STD(asinhf);
|
||||
return bfloat16(asinhf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
|
||||
return bfloat16(::acosh(float(a)));
|
||||
EIGEN_USING_STD(acoshf);
|
||||
return bfloat16(acoshf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
|
||||
return bfloat16(::atanh(float(a)));
|
||||
EIGEN_USING_STD(atanhf);
|
||||
return bfloat16(atanhf(float(a)));
|
||||
}
|
||||
#endif
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
|
||||
return bfloat16(::floorf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {
|
||||
return bfloat16(::rintf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {
|
||||
return bfloat16(::ceilf(float(a)));
|
||||
}
|
||||
@ -581,6 +587,17 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bf
|
||||
return f1 < f2 ? b : a;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) {
|
||||
const float f1 = static_cast<float>(a);
|
||||
const float f2 = static_cast<float>(b);
|
||||
return bfloat16(::fminf(f1, f2));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
|
||||
const float f1 = static_cast<float>(a);
|
||||
const float f2 = static_cast<float>(b);
|
||||
return bfloat16(::fmaxf(f1, f2));
|
||||
}
|
||||
|
||||
#ifndef EIGEN_NO_IO
|
||||
EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {
|
||||
os << static_cast<float>(v);
|
||||
|
@ -3373,6 +3373,11 @@ template <> EIGEN_STRONG_INLINE Packet4bf pmax<Packet4bf>(const Packet4bf &a,
|
||||
return F32ToBf16(pmax<Packet4f>(Bf16ToF32(a), Bf16ToF32(b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4bf plset<Packet4bf>(const bfloat16& a)
|
||||
{
|
||||
return F32ToBf16(plset<Packet4f>(static_cast<float>(a)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4bf por(const Packet4bf& a,const Packet4bf& b) {
|
||||
return por<Packet4us>(a, b);
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
@ -540,7 +541,7 @@ template<typename T1,typename T2>
|
||||
typename NumTraits<typename NumTraits<T1>::Real>::NonInteger test_relative_error(const T1 &a, const T2 &b, typename internal::enable_if<internal::is_arithmetic<typename NumTraits<T1>::Real>::value, T1>::type* = 0)
|
||||
{
|
||||
typedef typename NumTraits<typename NumTraits<T1>::Real>::NonInteger RealScalar;
|
||||
return numext::sqrt(RealScalar(numext::abs2(a-b))/RealScalar((numext::mini)(numext::abs2(a),numext::abs2(b))));
|
||||
return numext::sqrt(RealScalar(numext::abs2(a-b))/(numext::mini)(RealScalar(numext::abs2(a)),RealScalar(numext::abs2(b))));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
|
@ -498,18 +498,18 @@ void packetmath_real() {
|
||||
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = internal::random<Scalar>(0, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
|
||||
data2[i] = internal::random<Scalar>(0, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
|
||||
data1[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
||||
data2[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
||||
}
|
||||
|
||||
if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = 0;
|
||||
if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = Scalar(0);
|
||||
|
||||
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, Scalar(1) / std::sqrt, internal::prsqrt);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, 1 / std::sqrt, internal::prsqrt);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-3, 3));
|
||||
data2[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-3, 3));
|
||||
data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3)));
|
||||
data2[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-3, 3)));
|
||||
}
|
||||
CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
|
||||
@ -522,42 +522,49 @@ void packetmath_real() {
|
||||
|
||||
// See bug 1785.
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = -1.5 + i;
|
||||
data2[i] = -1.5 + i;
|
||||
data1[i] = Scalar(-1.5 + i);
|
||||
data2[i] = Scalar(-1.5 + i);
|
||||
}
|
||||
CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = internal::random<Scalar>(-1, 1);
|
||||
data2[i] = internal::random<Scalar>(-1, 1);
|
||||
data1[i] = Scalar(internal::random<double>(-1, 1));
|
||||
data2[i] = Scalar(internal::random<double>(-1, 1));
|
||||
}
|
||||
CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = internal::random<Scalar>(-87, 88);
|
||||
data2[i] = internal::random<Scalar>(-87, 88);
|
||||
data1[i] = Scalar(internal::random<double>(-87, 88));
|
||||
data2[i] = Scalar(internal::random<double>(-87, 88));
|
||||
}
|
||||
CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
|
||||
data2[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
|
||||
data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
||||
data2[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
||||
}
|
||||
data1[0] = 1e-20;
|
||||
data1[0] = Scalar(1e-20);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasTanh, std::tanh, internal::ptanh);
|
||||
if (PacketTraits::HasExp && PacketSize >= 2) {
|
||||
const Scalar small = std::numeric_limits<Scalar>::epsilon();
|
||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
data1[1] = std::numeric_limits<Scalar>::epsilon();
|
||||
data1[1] = small;
|
||||
test::packet_helper<PacketTraits::HasExp, Packet> h;
|
||||
h.store(data2, internal::pexp(h.load(data1)));
|
||||
VERIFY((numext::isnan)(data2[0]));
|
||||
VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
||||
// TODO(rmlarsen): Re-enable for bfloat16.
|
||||
if (!internal::is_same<Scalar, bfloat16>::value) {
|
||||
VERIFY_IS_EQUAL(std::exp(small), data2[1]);
|
||||
}
|
||||
|
||||
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
||||
data1[1] = 0;
|
||||
data1[0] = -small;
|
||||
data1[1] = Scalar(0);
|
||||
h.store(data2, internal::pexp(h.load(data1)));
|
||||
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]);
|
||||
// TODO(rmlarsen): Re-enable for bfloat16.
|
||||
if (!internal::is_same<Scalar, bfloat16>::value) {
|
||||
VERIFY_IS_EQUAL(std::exp(-small), data2[0]);
|
||||
}
|
||||
VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]);
|
||||
|
||||
data1[0] = (std::numeric_limits<Scalar>::min)();
|
||||
@ -584,7 +591,7 @@ void packetmath_real() {
|
||||
if (PacketTraits::HasExp) {
|
||||
internal::scalar_logistic_op<Scalar> logistic;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = internal::random<Scalar>(-20, 20);
|
||||
data1[i] = Scalar(internal::random<double>(-20, 20));
|
||||
}
|
||||
|
||||
test::packet_helper<PacketTraits::HasExp, Packet> h;
|
||||
@ -613,7 +620,7 @@ void packetmath_real() {
|
||||
VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
||||
|
||||
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
||||
data1[1] = 0;
|
||||
data1[1] = Scalar(0);
|
||||
h.store(data2, internal::plog(h.load(data1)));
|
||||
VERIFY((numext::isnan)(data2[0]));
|
||||
VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]);
|
||||
@ -630,7 +637,8 @@ void packetmath_real() {
|
||||
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
||||
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
||||
h.store(data2, internal::plog(h.load(data1)));
|
||||
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
||||
// TODO(rmlarsen): Reenable.
|
||||
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
||||
VERIFY((numext::isnan)(data2[1]));
|
||||
}
|
||||
#endif
|
||||
@ -654,17 +662,22 @@ void packetmath_real() {
|
||||
if (PacketTraits::HasSqrt) {
|
||||
test::packet_helper<PacketTraits::HasSqrt, Packet> h;
|
||||
data1[0] = Scalar(-1.0f);
|
||||
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
||||
if (std::numeric_limits<Scalar>::has_denorm == std::denorm_present) {
|
||||
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
||||
} else {
|
||||
data1[1] = -std::numeric_limits<Scalar>::epsilon();
|
||||
}
|
||||
h.store(data2, internal::psqrt(h.load(data1)));
|
||||
VERIFY((numext::isnan)(data2[0]));
|
||||
VERIFY((numext::isnan)(data2[1]));
|
||||
}
|
||||
if (PacketTraits::HasCos) {
|
||||
// TODO(rmlarsen): Re-enable for bfloat16.
|
||||
if (PacketTraits::HasCos && !internal::is_same<Scalar, bfloat16>::value) {
|
||||
test::packet_helper<PacketTraits::HasCos, Packet> h;
|
||||
for (Scalar k = 1; k < Scalar(10000) / std::numeric_limits<Scalar>::epsilon(); k *= 2) {
|
||||
for (Scalar k = Scalar(1); k < Scalar(10000) / std::numeric_limits<Scalar>::epsilon(); k *= Scalar(2)) {
|
||||
for (int k1 = 0; k1 <= 1; ++k1) {
|
||||
data1[0] = (2 * k + k1) * Scalar(EIGEN_PI) / 2 * internal::random<Scalar>(0.8, 1.2);
|
||||
data1[1] = (2 * k + 2 + k1) * Scalar(EIGEN_PI) / 2 * internal::random<Scalar>(0.8, 1.2);
|
||||
data1[0] = Scalar((2 * k + k1) * EIGEN_PI / 2 * internal::random<double>(0.8, 1.2));
|
||||
data1[1] = Scalar((2 * k + 2 + k1) * EIGEN_PI / 2 * internal::random<double>(0.8, 1.2));
|
||||
h.store(data2, internal::pcos(h.load(data1)));
|
||||
h.store(data2 + PacketSize, internal::psin(h.load(data1)));
|
||||
VERIFY(data2[0] <= Scalar(1.) && data2[0] >= Scalar(-1.));
|
||||
@ -765,16 +778,16 @@ void packetmath_real<bfloat16, typename internal::packet_traits<bfloat16>::type>
|
||||
|
||||
template <typename Scalar>
|
||||
Scalar propagate_nan_max(const Scalar& a, const Scalar& b) {
|
||||
if ((std::isnan)(a)) return a;
|
||||
if ((std::isnan)(b)) return b;
|
||||
return (std::max)(a,b);
|
||||
if ((numext::isnan)(a)) return a;
|
||||
if ((numext::isnan)(b)) return b;
|
||||
return (numext::maxi)(a,b);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
Scalar propagate_nan_min(const Scalar& a, const Scalar& b) {
|
||||
if ((std::isnan)(a)) return a;
|
||||
if ((std::isnan)(b)) return b;
|
||||
return (std::min)(a,b);
|
||||
if ((numext::isnan)(a)) return a;
|
||||
if ((numext::isnan)(b)) return b;
|
||||
return (numext::mini)(a,b);
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Packet>
|
||||
|
@ -156,7 +156,7 @@ struct packet_helper<false,Packet>
|
||||
#define CHECK_CWISE1_IF(COND, REFOP, POP) if(COND) { \
|
||||
test::packet_helper<COND,Packet> h; \
|
||||
for (int i=0; i<PacketSize; ++i) \
|
||||
ref[i] = REFOP(data1[i]); \
|
||||
ref[i] = Scalar(REFOP(data1[i])); \
|
||||
h.store(data2, POP(h.load(data1))); \
|
||||
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
|
||||
}
|
||||
@ -164,7 +164,7 @@ struct packet_helper<false,Packet>
|
||||
#define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \
|
||||
test::packet_helper<COND,Packet> h; \
|
||||
for (int i=0; i<PacketSize; ++i) \
|
||||
ref[i] = REFOP(data1[i], data1[i+PacketSize]); \
|
||||
ref[i] = Scalar(REFOP(data1[i], data1[i+PacketSize])); \
|
||||
h.store(data2, POP(h.load(data1),h.load(data1+PacketSize))); \
|
||||
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
|
||||
}
|
||||
@ -172,8 +172,8 @@ struct packet_helper<false,Packet>
|
||||
#define CHECK_CWISE3_IF(COND, REFOP, POP) if (COND) { \
|
||||
test::packet_helper<COND, Packet> h; \
|
||||
for (int i = 0; i < PacketSize; ++i) \
|
||||
ref[i] = \
|
||||
REFOP(data1[i], data1[i + PacketSize], data1[i + 2 * PacketSize]); \
|
||||
ref[i] = Scalar(REFOP(data1[i], data1[i + PacketSize], \
|
||||
data1[i + 2 * PacketSize])); \
|
||||
h.store(data2, POP(h.load(data1), h.load(data1 + PacketSize), \
|
||||
h.load(data1 + 2 * PacketSize))); \
|
||||
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
|
||||
|
Loading…
x
Reference in New Issue
Block a user