mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-23 06:43:13 +08:00
add pbroadcast2/4 generic intrinsics
This commit is contained in:
parent
c8c81c1e74
commit
b286a1e75c
@ -173,6 +173,40 @@ pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
|
|||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
|
pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
|
||||||
|
|
||||||
|
/** \internal equivalent to
|
||||||
|
* \code
|
||||||
|
* a0 = pload1(a+0);
|
||||||
|
* a1 = pload1(a+1);
|
||||||
|
* a2 = pload1(a+2);
|
||||||
|
* a3 = pload1(a+3);
|
||||||
|
* \endcode
|
||||||
|
* \sa pset1, pload1, ploaddup, pbroadcast2
|
||||||
|
*/
|
||||||
|
template<typename Packet> EIGEN_DEVICE_FUNC
|
||||||
|
inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
|
||||||
|
Packet& a0, Packet& a1, Packet& a2, Packet& a3)
|
||||||
|
{
|
||||||
|
a0 = pload1<Packet>(a+0);
|
||||||
|
a1 = pload1<Packet>(a+1);
|
||||||
|
a2 = pload1<Packet>(a+2);
|
||||||
|
a3 = pload1<Packet>(a+3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \internal equivalent to
|
||||||
|
* \code
|
||||||
|
* a0 = pload1(a+0);
|
||||||
|
* a1 = pload1(a+1);
|
||||||
|
* \endcode
|
||||||
|
* \sa pset1, pload1, ploaddup, pbroadcast4
|
||||||
|
*/
|
||||||
|
template<typename Packet> EIGEN_DEVICE_FUNC
|
||||||
|
inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
|
||||||
|
Packet& a0, Packet& a1)
|
||||||
|
{
|
||||||
|
a0 = pload1<Packet>(a+0);
|
||||||
|
a1 = pload1<Packet>(a+1);
|
||||||
|
}
|
||||||
|
|
||||||
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
|
||||||
template<typename Scalar> inline typename packet_traits<Scalar>::type
|
template<typename Scalar> inline typename packet_traits<Scalar>::type
|
||||||
plset(const Scalar& a) { return a; }
|
plset(const Scalar& a) { return a; }
|
||||||
|
@ -391,6 +391,38 @@ template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// with AVX, the default implementations based on pload1 are faster
|
||||||
|
#ifndef __AVX__
|
||||||
|
template<> EIGEN_STRONG_INLINE void
|
||||||
|
pbroadcast4<Packet4f>(const float *a,
|
||||||
|
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||||
|
{
|
||||||
|
a3 = pload<Packet4f>(a);
|
||||||
|
a0 = vec4f_swizzle1(a3, 0,0,0,0);
|
||||||
|
a1 = vec4f_swizzle1(a3, 1,1,1,1);
|
||||||
|
a2 = vec4f_swizzle1(a3, 2,2,2,2);
|
||||||
|
a3 = vec4f_swizzle1(a3, 3,3,3,3);
|
||||||
|
}
|
||||||
|
template<> EIGEN_STRONG_INLINE void
|
||||||
|
pbroadcast4<Packet2d>(const double *a,
|
||||||
|
Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
|
||||||
|
{
|
||||||
|
#ifdef EIGEN_VECTORIZE_SSE3
|
||||||
|
a0 = _mm_loaddup_pd(a+0);
|
||||||
|
a1 = _mm_loaddup_pd(a+1);
|
||||||
|
a2 = _mm_loaddup_pd(a+2);
|
||||||
|
a3 = _mm_loaddup_pd(a+3);
|
||||||
|
#else
|
||||||
|
a1 = pload<Packet2d>(a);
|
||||||
|
a0 = vec2d_swizzle1(a1, 0,0);
|
||||||
|
a1 = vec2d_swizzle1(a1, 1,1);
|
||||||
|
a3 = pload<Packet2d>(a+2);
|
||||||
|
a2 = vec2d_swizzle1(a3, 0,0);
|
||||||
|
a3 = vec2d_swizzle1(a3, 1,1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
|
EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs)
|
||||||
{
|
{
|
||||||
vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
|
vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user