Fix packed load/store for PowerPC's VSX

The vec_vsx_ld/vec_vsx_st builtins were wrongly used for aligned load/store. In fact, they perform unaligned memory access and, even when the address is 16-byte aligned, they are much slower (at least 2x) than their aligned counterparts.

For double/Packet2d vec_xl/vec_xst should be prefered over vec_ld/vec_st, although the latter works when casted to float/Packet4f.

Silencing some weird warning with throw but some GCC versions. Such warning are not thrown by Clang.
This commit is contained in:
João P. L. de Carvalho 2019-08-09 16:02:55 -06:00
parent 4d29aa0294
commit 787f6ef025

View File

@ -240,42 +240,38 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
// Need to define them first or we get specialization after instantiation errors
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
{
// some versions of GCC throw "unused-but-set-parameter".
// ignoring these warnings for now.
EIGEN_UNUSED_VARIABLE(from);
EIGEN_DEBUG_ALIGNED_LOAD
#ifdef __VSX__
return vec_vsx_ld(0, from);
#else
return vec_ld(0, from);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
{
// some versions of GCC throw "unused-but-set-parameter".
// ignoring these warnings for now.
EIGEN_UNUSED_VARIABLE(from);
EIGEN_DEBUG_ALIGNED_LOAD
#ifdef __VSX__
return vec_vsx_ld(0, from);
#else
return vec_ld(0, from);
#endif
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
{
// some versions of GCC throw "unused-but-set-parameter" (float *to).
// ignoring these warnings for now.
EIGEN_UNUSED_VARIABLE(to);
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
vec_vsx_st(from, 0, to);
#else
vec_st(from, 0, to);
#endif
}
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
{
// some versions of GCC throw "unused-but-set-parameter" (float *to).
// ignoring these warnings for now.
EIGEN_UNUSED_VARIABLE(to);
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
vec_vsx_st(from, 0, to);
#else
vec_st(from, 0, to);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
@ -940,21 +936,13 @@ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
#ifdef __VSX__
return vec_vsx_ld(0, from);
#else
return vec_ld(0, from);
#endif
return vec_xl(0, const_cast<double *>(from)); // cast needed by Clang
}
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
{
EIGEN_DEBUG_ALIGNED_STORE
#ifdef __VSX__
vec_vsx_st(from, 0, to);
#else
vec_st(from, 0, to);
#endif
vec_xst(from, 0, to);
}
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {