mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Implemented the SSE version of the gather and scatter packet primitives.
This commit is contained in:
parent
7f3162f707
commit
8a94cb3edd
@ -223,7 +223,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
|
||||
{ return ploadu<Packet>(from); }
|
||||
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, int /*stride*/)
|
||||
{ (*to) = from; }
|
||||
{ pstore(to, from); }
|
||||
|
||||
/** \internal tries to do cache prefetching of \a addr */
|
||||
template<typename Scalar> inline void prefetch(const Scalar* addr)
|
||||
|
@ -111,6 +111,24 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
|
||||
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, int stride)
|
||||
{
|
||||
return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
|
||||
std::imag(from[0*stride]), std::real(from[0*stride])));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, int stride)
|
||||
{
|
||||
/* for (int i = 0; i < 2; i+=2) {
|
||||
to[stride*i] = std::complex<float>(from.v[i], from.v[i+1]);
|
||||
}*/
|
||||
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
|
||||
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
|
||||
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
|
||||
|
@ -339,6 +339,39 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castps_pd(from)); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), _mm_castsi128_pd(from)); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, int stride)
|
||||
{
|
||||
return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, int stride)
|
||||
{
|
||||
return _mm_set_pd(from[1*stride], from[0*stride]);
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, int stride)
|
||||
{
|
||||
return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, int stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtss_f32(from);
|
||||
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
|
||||
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
|
||||
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, int stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtsd_f64(from);
|
||||
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, int stride)
|
||||
{
|
||||
to[stride*0] = _mm_cvtsi128_si32(from);
|
||||
to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
|
||||
to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
|
||||
to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
|
||||
}
|
||||
|
||||
// some compilers might be tempted to perform multiple moves instead of using a vector path.
|
||||
template<> EIGEN_STRONG_INLINE void pstore1<Packet4f>(float* to, const float& a)
|
||||
{
|
||||
|
@ -411,6 +411,7 @@ void test_packetmath()
|
||||
|
||||
CALL_SUBTEST_1( packetmath_scatter_gather<float>() );
|
||||
CALL_SUBTEST_2( packetmath_scatter_gather<double>() );
|
||||
CALL_SUBTEST_3( packetmath_scatter_gather<int>() );
|
||||
CALL_SUBTEST_3( packetmath_scatter_gather<std::complex<float> >() );
|
||||
CALL_SUBTEST_3( packetmath_scatter_gather<std::complex<double> >() );
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user