mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Add reciprocal packet op and fast specializations for float with SSE, AVX, and AVX512.
This commit is contained in:
parent
4b0926f99b
commit
ea2c02060c
@ -65,6 +65,7 @@ struct default_packet_traits
|
|||||||
HasCmp = 0,
|
HasCmp = 0,
|
||||||
|
|
||||||
HasDiv = 0,
|
HasDiv = 0,
|
||||||
|
HasReciprocal = 0,
|
||||||
HasSqrt = 0,
|
HasSqrt = 0,
|
||||||
HasRsqrt = 0,
|
HasRsqrt = 0,
|
||||||
HasExp = 0,
|
HasExp = 0,
|
||||||
@ -816,13 +817,6 @@ Packet plog2(const Packet& a) {
|
|||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet psqrt(const Packet& a) { return numext::sqrt(a); }
|
Packet psqrt(const Packet& a) { return numext::sqrt(a); }
|
||||||
|
|
||||||
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
|
||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
||||||
Packet prsqrt(const Packet& a) {
|
|
||||||
typedef typename internal::unpacket_traits<Packet>::type Scalar;
|
|
||||||
return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \internal \returns the rounded value of \a a (coeff-wise) */
|
/** \internal \returns the rounded value of \a a (coeff-wise) */
|
||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet pround(const Packet& a) { using numext::round; return round(a); }
|
Packet pround(const Packet& a) { using numext::round; return round(a); }
|
||||||
@ -1035,6 +1029,19 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
|
|||||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \internal \returns 1 / a (coeff-wise) */
|
||||||
|
template <typename Packet>
|
||||||
|
EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
|
||||||
|
using Scalar = typename unpacket_traits<Packet>::type;
|
||||||
|
return pdiv(pset1<Packet>(Scalar(1)), a);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
|
||||||
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
|
Packet prsqrt(const Packet& a) {
|
||||||
|
return preciprocal<Packet>(psqrt(a));
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -17,6 +17,35 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
/** \internal Fast reciprocal using Newton-Raphson's method.
|
||||||
|
We assume that the starting guess provided in approx_a_recip has at least
|
||||||
|
half the leading mantissa bits in the correct result, such that a single
|
||||||
|
Newton-Raphson step is sufficient to get within 1-2 ulps of the currect result.
|
||||||
|
*/
|
||||||
|
template <typename Packet, int Steps>
|
||||||
|
struct generic_reciprocal_newton_step {
|
||||||
|
static_assert(Steps > 0, "Steps must be at least 1.");
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
|
||||||
|
run(const Packet& a, const Packet& approx_a_recip) {
|
||||||
|
using Scalar = typename unpacket_traits<Packet>::type;
|
||||||
|
const Packet two = pset1<Packet>(Scalar(2));
|
||||||
|
const Packet neg_a = pnegate(a);
|
||||||
|
// Refine the approximation using one Newton-Raphson step:
|
||||||
|
// x_{i} = x_{i-1} * (2 - a * x_{i-1})
|
||||||
|
const Packet x =
|
||||||
|
generic_reciprocal_newton_step<Packet,Steps - 1>::run(a, approx_a_recip);
|
||||||
|
return pmul(x, pmadd(neg_a, x, two));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Packet>
|
||||||
|
struct generic_reciprocal_newton_step<Packet, 0> {
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
|
||||||
|
run(const Packet& /*unused*/, const Packet& approx_a_recip) {
|
||||||
|
return approx_a_recip;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
||||||
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
||||||
is accurate up to a couple of ulps in the (approximate) range [-8, 8],
|
is accurate up to a couple of ulps in the (approximate) range [-8, 8],
|
||||||
|
@ -166,19 +166,12 @@ Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
|||||||
return pselect<Packet8f>(not_normal_finite_mask, y_approx, y_newton);
|
return pselect<Packet8f>(not_normal_finite_mask, y_approx, y_newton);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
template<> EIGEN_STRONG_INLINE Packet8f preciprocal<Packet8f>(const Packet8f& a) {
|
||||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
return generic_reciprocal_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rcp_ps(a));
|
||||||
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
|
|
||||||
EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
|
|
||||||
return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(_x));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
||||||
Packet4d prsqrt<Packet4d>(const Packet4d& _x) {
|
|
||||||
EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
|
|
||||||
return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(_x));
|
|
||||||
}
|
|
||||||
|
|
||||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
|
||||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
|
||||||
@ -190,6 +183,7 @@ F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
|
|||||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
|
||||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
|
||||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
|
||||||
|
F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal)
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
|
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
|
||||||
@ -214,6 +208,7 @@ BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
|
|||||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
|
||||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
|
||||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
|
||||||
|
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal)
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
|
EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
|
||||||
|
@ -78,6 +78,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
|
|
||||||
HasCmp = 1,
|
HasCmp = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
|
HasReciprocal = EIGEN_FAST_MATH,
|
||||||
HasSin = EIGEN_FAST_MATH,
|
HasSin = EIGEN_FAST_MATH,
|
||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
|
@ -253,13 +253,6 @@ prsqrt<Packet16f>(const Packet16f& _x) {
|
|||||||
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
||||||
return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
|
return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
|
|
||||||
template <>
|
|
||||||
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
|
||||||
EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
|
|
||||||
return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
|
||||||
@ -304,12 +297,17 @@ prsqrt<Packet8d>(const Packet8d& _x) {
|
|||||||
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
// return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
|
||||||
return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);
|
return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f& a) {
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX512ER
|
||||||
|
return _mm512_rcp28_ps(a));
|
||||||
#else
|
#else
|
||||||
template <>
|
return generic_reciprocal_newton_step<Packet16f, /*Steps=*/1>::run(a, _mm512_rcp14_ps(a));
|
||||||
EIGEN_STRONG_INLINE Packet8d prsqrt<Packet8d>(const Packet8d& x) {
|
#endif
|
||||||
EIGEN_DECLARE_CONST_Packet8d(one, 1.0f);
|
|
||||||
return _mm512_div_pd(p8d_one, _mm512_sqrt_pd(x));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
|
||||||
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
|
@ -120,6 +120,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = EIGEN_FAST_MATH,
|
HasSqrt = EIGEN_FAST_MATH,
|
||||||
HasRsqrt = EIGEN_FAST_MATH,
|
HasRsqrt = EIGEN_FAST_MATH,
|
||||||
|
HasReciprocal = EIGEN_FAST_MATH,
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
HasErf = EIGEN_FAST_MATH,
|
HasErf = EIGEN_FAST_MATH,
|
||||||
#endif
|
#endif
|
||||||
|
@ -148,21 +148,13 @@ Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
|
|||||||
return pselect<Packet4f>(not_normal_finite_mask, y_approx, y_newton);
|
return pselect<Packet4f>(not_normal_finite_mask, y_approx, y_newton);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
|
||||||
Packet4f prsqrt<Packet4f>(const Packet4f& x) {
|
|
||||||
// Unfortunately we can't use the much faster mm_rsqrt_ps since it only provides an approximation.
|
|
||||||
return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_STRONG_INLINE Packet4f preciprocal<Packet4f>(const Packet4f& a) {
|
||||||
Packet2d prsqrt<Packet2d>(const Packet2d& x) {
|
return generic_reciprocal_newton_step<Packet4f, /*Steps=*/1>::run(a, _mm_rcp_ps(a));
|
||||||
return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Hyperbolic Tangent function.
|
// Hyperbolic Tangent function.
|
||||||
template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||||
|
@ -136,6 +136,7 @@ struct packet_traits<float> : default_packet_traits {
|
|||||||
|
|
||||||
HasCmp = 1,
|
HasCmp = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
|
HasReciprocal = EIGEN_FAST_MATH,
|
||||||
HasSin = EIGEN_FAST_MATH,
|
HasSin = EIGEN_FAST_MATH,
|
||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
|
@ -723,13 +723,18 @@ struct scalar_inverse_op {
|
|||||||
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
|
EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
|
||||||
template<typename Packet>
|
template<typename Packet>
|
||||||
EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
|
EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
|
||||||
{ return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
|
{ return internal::preciprocal(a); }
|
||||||
};
|
};
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct functor_traits<scalar_inverse_op<Scalar> > {
|
struct functor_traits<scalar_inverse_op<Scalar> > {
|
||||||
enum {
|
enum {
|
||||||
PacketAccess = packet_traits<Scalar>::HasDiv,
|
PacketAccess = packet_traits<Scalar>::HasDiv,
|
||||||
Cost = scalar_div_cost<Scalar, PacketAccess>::value
|
// If packet_traits<Scalar>::HasReciprocal then the Estimated cost is that
|
||||||
|
// of computing an approximation plus a single Newton-Raphson step, which
|
||||||
|
// consists of 1 pmul + 1 pmadd.
|
||||||
|
Cost = (packet_traits<Scalar>::HasReciprocal
|
||||||
|
? 4 * NumTraits<Scalar>::MulCost
|
||||||
|
: scalar_div_cost<Scalar, PacketAccess>::value)
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -58,10 +58,10 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
|
|||||||
|
|
||||||
const float* data = matrix.data();
|
const float* data = matrix.data();
|
||||||
const Index stride = matrix.innerStride();
|
const Index stride = matrix.innerStride();
|
||||||
Packet4f L1_ = ploadt<Packet4f,MatrixAlignment>(data);
|
Packet4f L1 = ploadt<Packet4f,MatrixAlignment>(data);
|
||||||
Packet4f L2_ = ploadt<Packet4f,MatrixAlignment>(data + stride*4);
|
Packet4f L2 = ploadt<Packet4f,MatrixAlignment>(data + stride*4);
|
||||||
Packet4f L3_ = ploadt<Packet4f,MatrixAlignment>(data + stride*8);
|
Packet4f L3 = ploadt<Packet4f,MatrixAlignment>(data + stride*8);
|
||||||
Packet4f L4_ = ploadt<Packet4f,MatrixAlignment>(data + stride*12);
|
Packet4f L4 = ploadt<Packet4f,MatrixAlignment>(data + stride*12);
|
||||||
|
|
||||||
// Four 2x2 sub-matrices of the input matrix
|
// Four 2x2 sub-matrices of the input matrix
|
||||||
// input = [[A, B],
|
// input = [[A, B],
|
||||||
@ -70,17 +70,17 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
|
|||||||
|
|
||||||
if (!StorageOrdersMatch)
|
if (!StorageOrdersMatch)
|
||||||
{
|
{
|
||||||
A = vec4f_unpacklo(L1_, L2_);
|
A = vec4f_unpacklo(L1, L2);
|
||||||
B = vec4f_unpacklo(L3_, L4_);
|
B = vec4f_unpacklo(L3, L4);
|
||||||
C = vec4f_unpackhi(L1_, L2_);
|
C = vec4f_unpackhi(L1, L2);
|
||||||
D = vec4f_unpackhi(L3_, L4_);
|
D = vec4f_unpackhi(L3, L4);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
A = vec4f_movelh(L1_, L2_);
|
A = vec4f_movelh(L1, L2);
|
||||||
B = vec4f_movehl(L2_, L1_);
|
B = vec4f_movehl(L2, L1);
|
||||||
C = vec4f_movelh(L3_, L4_);
|
C = vec4f_movelh(L3, L4);
|
||||||
D = vec4f_movehl(L4_, L3_);
|
D = vec4f_movehl(L4, L3);
|
||||||
}
|
}
|
||||||
|
|
||||||
Packet4f AB, DC;
|
Packet4f AB, DC;
|
||||||
@ -120,7 +120,7 @@ struct compute_inverse_size4<Architecture::Target, float, MatrixType, ResultType
|
|||||||
Packet4f det = vec4f_duplane(psub(padd(d1, d2), d), 0);
|
Packet4f det = vec4f_duplane(psub(padd(d1, d2), d), 0);
|
||||||
|
|
||||||
// reciprocal of the determinant of the input matrix, rd = 1/det
|
// reciprocal of the determinant of the input matrix, rd = 1/det
|
||||||
Packet4f rd = pdiv(pset1<Packet4f>(1.0f), det);
|
Packet4f rd = preciprocal(det);
|
||||||
|
|
||||||
// Four sub-matrices of the inverse
|
// Four sub-matrices of the inverse
|
||||||
Packet4f iA, iB, iC, iD;
|
Packet4f iA, iB, iC, iD;
|
||||||
|
@ -28,6 +28,10 @@ inline T REF_DIV(const T& a, const T& b) {
|
|||||||
return a / b;
|
return a / b;
|
||||||
}
|
}
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
inline T REF_RECIPROCAL(const T& a) {
|
||||||
|
return T(1) / a;
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
inline T REF_ABS_DIFF(const T& a, const T& b) {
|
inline T REF_ABS_DIFF(const T& a, const T& b) {
|
||||||
return a > b ? a - b : b - a;
|
return a > b ? a - b : b - a;
|
||||||
}
|
}
|
||||||
@ -464,9 +468,11 @@ void packetmath() {
|
|||||||
CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
|
CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
|
||||||
CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
|
CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
|
||||||
|
|
||||||
if (PacketTraits::HasNegate) CHECK_CWISE1(internal::negate, internal::pnegate);
|
CHECK_CWISE1_IF(PacketTraits::HasNegate, internal::negate, internal::pnegate);
|
||||||
|
CHECK_CWISE1_IF(PacketTraits::HasReciprocal, REF_RECIPROCAL, internal::preciprocal);
|
||||||
CHECK_CWISE1(numext::conj, internal::pconj);
|
CHECK_CWISE1(numext::conj, internal::pconj);
|
||||||
|
|
||||||
|
|
||||||
for (int offset = 0; offset < 3; ++offset) {
|
for (int offset = 0; offset < 3; ++offset) {
|
||||||
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[offset];
|
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[offset];
|
||||||
internal::pstore(data2, internal::pset1<Packet>(data1[offset]));
|
internal::pstore(data2, internal::pset1<Packet>(data1[offset]));
|
||||||
|
@ -19,9 +19,7 @@ template<typename MatrixType> void inverse_permutation_4x4()
|
|||||||
{
|
{
|
||||||
MatrixType m = PermutationMatrix<4>(indices);
|
MatrixType m = PermutationMatrix<4>(indices);
|
||||||
MatrixType inv = m.inverse();
|
MatrixType inv = m.inverse();
|
||||||
double error = double( (m*inv-MatrixType::Identity()).norm() / NumTraits<Scalar>::epsilon() );
|
VERIFY_IS_APPROX(m*inv, MatrixType::Identity());
|
||||||
EIGEN_DEBUG_VAR(error)
|
|
||||||
VERIFY(error == 0.0);
|
|
||||||
std::next_permutation(indices.data(),indices.data()+4);
|
std::next_permutation(indices.data(),indices.data()+4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -601,13 +601,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two(
|
|||||||
ScalarType(6.79019408009981274425e-9)
|
ScalarType(6.79019408009981274425e-9)
|
||||||
};
|
};
|
||||||
const T eight = pset1<T>(ScalarType(8.0));
|
const T eight = pset1<T>(ScalarType(8.0));
|
||||||
const T one = pset1<T>(ScalarType(1));
|
|
||||||
const T neg_two = pset1<T>(ScalarType(-2));
|
const T neg_two = pset1<T>(ScalarType(-2));
|
||||||
T x, x0, x1, z;
|
T x, x0, x1, z;
|
||||||
|
|
||||||
x = psqrt(pmul(neg_two, plog(b)));
|
x = psqrt(pmul(neg_two, plog(b)));
|
||||||
x0 = psub(x, pdiv(plog(x), x));
|
x0 = psub(x, pdiv(plog(x), x));
|
||||||
z = pdiv(one, x);
|
z = preciprocal(x);
|
||||||
x1 = pmul(
|
x1 = pmul(
|
||||||
z, pselect(
|
z, pselect(
|
||||||
pcmp_lt(x, eight),
|
pcmp_lt(x, eight),
|
||||||
|
@ -130,7 +130,7 @@ static void test_3d()
|
|||||||
Tensor<float, 3, RowMajor> mat4(2,3,7);
|
Tensor<float, 3, RowMajor> mat4(2,3,7);
|
||||||
mat4 = mat2 * 3.14f;
|
mat4 = mat2 * 3.14f;
|
||||||
Tensor<float, 3> mat5(2,3,7);
|
Tensor<float, 3> mat5(2,3,7);
|
||||||
mat5 = mat1.inverse().log();
|
mat5 = (mat1 + mat1.constant(1)).inverse().log();
|
||||||
Tensor<float, 3, RowMajor> mat6(2,3,7);
|
Tensor<float, 3, RowMajor> mat6(2,3,7);
|
||||||
mat6 = mat2.pow(0.5f) * 3.14f;
|
mat6 = mat2.pow(0.5f) * 3.14f;
|
||||||
Tensor<float, 3> mat7(2,3,7);
|
Tensor<float, 3> mat7(2,3,7);
|
||||||
@ -150,7 +150,7 @@ static void test_3d()
|
|||||||
for (int k = 0; k < 7; ++k) {
|
for (int k = 0; k < 7; ++k) {
|
||||||
VERIFY_IS_APPROX(mat3(i,j,k), val + val);
|
VERIFY_IS_APPROX(mat3(i,j,k), val + val);
|
||||||
VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f);
|
VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f);
|
||||||
VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/val));
|
VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/(val + 1)));
|
||||||
VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f);
|
VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f);
|
||||||
VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f)));
|
VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f)));
|
||||||
VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f);
|
VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user