mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-03 10:14:04 +08:00
Add a pinsertlast function replacing the last entry of a packet by a scalar.
(useful to vectorize LinSpaced)
This commit is contained in:
parent
2634f9386c
commit
13fc18d3a2
@ -558,6 +558,19 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
|
||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||
}
|
||||
|
||||
/** \internal \returns \a a with last coefficients replaced by the scalar b */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC Packet pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
|
||||
{
|
||||
// Default implementation based on pblend.
|
||||
// It must be specialized for higher performance.
|
||||
Selector<unpacket_traits<Packet>::size> mask;
|
||||
// This for loop should be optimized away by the compiler.
|
||||
for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
|
||||
mask.select[i] = false;
|
||||
mask.select[unpacket_traits<Packet>::size-1] = true;
|
||||
return pblend(mask, pset1<Packet>(b), a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -604,6 +604,16 @@ template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, cons
|
||||
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC Packet8f pinsertlast(const Packet8f& a, float b)
|
||||
{
|
||||
return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC Packet4d pinsertlast(const Packet4d& a, double b)
|
||||
{
|
||||
return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -818,6 +818,26 @@ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, cons
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC Packet4f pinsertlast(const Packet4f& a, float b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_ps(a,pset1<Packet4f>(b),(1<<3));
|
||||
#else
|
||||
const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x0,0x0,0x0,0xFFFFFFFF));
|
||||
return _mm_or_ps(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet4f>(b)));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC Packet2d pinsertlast(const Packet2d& a, double b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_blend_pd(a,pset1<Packet2d>(b),(1<<1));
|
||||
#else
|
||||
const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x0,0xFFFFFFFF,0xFFFFFFFF));
|
||||
return _mm_or_pd(_mm_andnot_pd(mask, a), _mm_and_pd(mask, pset1<Packet2d>(b)));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Scalar path for pmadd with FMA to ensure consistency with vectorized path.
|
||||
#ifdef __FMA__
|
||||
template<> EIGEN_STRONG_INLINE float pmadd(const float& a, const float& b, const float& c) {
|
||||
|
@ -289,6 +289,16 @@ template<typename Scalar> void packetmath()
|
||||
VERIFY(isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue));
|
||||
}
|
||||
}
|
||||
|
||||
if (PacketTraits::HasBlend) {
|
||||
// pinsertlast
|
||||
for (int i=0; i<PacketSize; ++i)
|
||||
ref[i] = data1[i];
|
||||
Scalar s = internal::random<Scalar>();
|
||||
ref[PacketSize-1] = s;
|
||||
internal::pstore(data2, internal::pinsertlast(internal::pload<Packet>(data1),s));
|
||||
VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertlast");
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Scalar> void packetmath_real()
|
||||
|
Loading…
x
Reference in New Issue
Block a user