Remove unused packet op "palign".

Clean up a compiler warning in c++03 mode in AVX512/Complex.h.
This commit is contained in:
Rasmus Munk Larsen 2020-05-07 17:14:26 -07:00
parent 74ec8e6618
commit 225ab040e0
16 changed files with 1 additions and 915 deletions

View File

@ -685,35 +685,6 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_t
return ploadt<Packet, LoadMode>(from);
}
/** \internal default implementation of palign() allowing partial specialization */
template<int Offset,typename PacketType>
struct palign_impl
{
// by default data are aligned, so there is nothing to be done :)
static inline void run(PacketType&, const PacketType&) {}
};
/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
* of \a first and \a Offset first elements of \a second.
*
* This function is currently only used to optimize matrix-vector products on unligned matrices.
* It takes 2 packets that represent a contiguous memory array, and returns a packet starting
* at the position \a Offset. For instance, for packets of 4 elements, we have:
* Input:
* - first = {f0,f1,f2,f3}
* - second = {s0,s1,s2,s3}
* Output:
* - if Offset==0 then {f0,f1,f2,f3}
* - if Offset==1 then {f1,f2,f3,s0}
* - if Offset==2 then {f2,f3,s0,s1}
* - if Offset==3 then {f3,s0,s1,s3}
*/
template<int Offset,typename PacketType>
inline void palign(PacketType& first, const PacketType& second)
{
palign_impl<Offset,PacketType>::run(first,second);
}
/***************************************************************************
* Fast complex products (GCC generates a function call which is very slow)
***************************************************************************/

View File

@ -157,16 +157,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const P
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
}
template<int Offset>
struct palign_impl<Offset,Packet4cf>
{
static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
{
if (Offset==0) return;
palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
}
};
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
@ -339,16 +329,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const
Packet1cd(_mm256_extractf128_pd(a.v,1))));
}
template<int Offset>
struct palign_impl<Offset,Packet2cd>
{
static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
{
if (Offset==0) return;
palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
}
};
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const

View File

@ -691,93 +691,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x)
return _mm256_movemask_ps(x)!=0;
}
template<int Offset>
struct palign_impl<Offset,Packet8f>
{
static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
{
if (Offset==1)
{
first = _mm256_blend_ps(first, second, 1);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_blend_ps(tmp1, tmp2, 0x88);
}
else if (Offset==2)
{
first = _mm256_blend_ps(first, second, 3);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
}
else if (Offset==3)
{
first = _mm256_blend_ps(first, second, 7);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_blend_ps(tmp1, tmp2, 0xee);
}
else if (Offset==4)
{
first = _mm256_blend_ps(first, second, 15);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
}
else if (Offset==5)
{
first = _mm256_blend_ps(first, second, 31);
first = _mm256_permute2f128_ps(first, first, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
first = _mm256_permute2f128_ps(tmp, tmp, 1);
first = _mm256_blend_ps(tmp, first, 0x88);
}
else if (Offset==6)
{
first = _mm256_blend_ps(first, second, 63);
first = _mm256_permute2f128_ps(first, first, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
first = _mm256_permute2f128_ps(tmp, tmp, 1);
first = _mm256_blend_ps(tmp, first, 0xcc);
}
else if (Offset==7)
{
first = _mm256_blend_ps(first, second, 127);
first = _mm256_permute2f128_ps(first, first, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
first = _mm256_permute2f128_ps(tmp, tmp, 1);
first = _mm256_blend_ps(tmp, first, 0xee);
}
}
};
template<int Offset>
struct palign_impl<Offset,Packet4d>
{
static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
{
if (Offset==1)
{
first = _mm256_blend_pd(first, second, 1);
__m256d tmp = _mm256_permute_pd(first, 5);
first = _mm256_permute2f128_pd(tmp, tmp, 1);
first = _mm256_blend_pd(tmp, first, 0xA);
}
else if (Offset==2)
{
first = _mm256_blend_pd(first, second, 3);
first = _mm256_permute2f128_pd(first, first, 1);
}
else if (Offset==3)
{
first = _mm256_blend_pd(first, second, 7);
__m256d tmp = _mm256_permute_pd(first, 5);
first = _mm256_permute2f128_pd(tmp, tmp, 1);
first = _mm256_blend_pd(tmp, first, 5);
}
}
};
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet8f,8>& kernel) {
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
@ -1078,16 +991,6 @@ template<> EIGEN_STRONG_INLINE Packet8h pinsertlast(const Packet8h& a, Eigen::ha
return _mm_insert_epi16(a,int(b.x),7);
}
template<int Offset>
struct palign_impl<Offset,Packet8h>
{
static EIGEN_STRONG_INLINE void run(Packet8h& first, const Packet8h& second)
{
if (Offset!=0)
first = _mm_alignr_epi8(second,first, Offset*2);
}
};
EIGEN_STRONG_INLINE void
ptranspose(PacketBlock<Packet8h,8>& kernel) {
__m128i a = kernel.packet[0];

View File

@ -153,16 +153,6 @@ EIGEN_STRONG_INLINE Packet4cf predux_half_dowto4<Packet8cf>(const Packet8cf& a)
return Packet4cf(res);
}
template<int Offset>
struct palign_impl<Offset,Packet8cf>
{
static EIGEN_STRONG_INLINE void run(Packet8cf& first, const Packet8cf& second)
{
if (Offset==0) return;
palign_impl<Offset*2,Packet16f>::run(first.v, second.v);
}
};
template<> struct conj_helper<Packet8cf, Packet8cf, false,true>
{
EIGEN_STRONG_INLINE Packet8cf pmadd(const Packet8cf& x, const Packet8cf& y, const Packet8cf& c) const
@ -239,7 +229,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0,
HasSetLinear = 0
};
};
@ -351,16 +341,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const
Packet2cd(_mm512_extractf64x4_pd(a.v,1))));
}
template<int Offset>
struct palign_impl<Offset,Packet4cd>
{
static EIGEN_STRONG_INLINE void run(Packet4cd& first, const Packet4cd& second)
{
if (Offset==0) return;
palign_impl<Offset*2,Packet8d>::run(first.v, second.v);
}
};
template<> struct conj_helper<Packet4cd, Packet4cd, false,true>
{
EIGEN_STRONG_INLINE Packet4cd pmadd(const Packet4cd& x, const Packet4cd& y, const Packet4cd& c) const

View File

@ -919,52 +919,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x)
return !_mm512_kortestz(tmp,tmp);
}
template <int Offset>
struct palign_impl<Offset, Packet16f> {
static EIGEN_STRONG_INLINE void run(Packet16f& first,
const Packet16f& second) {
if (Offset != 0) {
__m512i first_idx = _mm512_set_epi32(
Offset + 15, Offset + 14, Offset + 13, Offset + 12, Offset + 11,
Offset + 10, Offset + 9, Offset + 8, Offset + 7, Offset + 6,
Offset + 5, Offset + 4, Offset + 3, Offset + 2, Offset + 1, Offset);
__m512i second_idx =
_mm512_set_epi32(Offset - 1, Offset - 2, Offset - 3, Offset - 4,
Offset - 5, Offset - 6, Offset - 7, Offset - 8,
Offset - 9, Offset - 10, Offset - 11, Offset - 12,
Offset - 13, Offset - 14, Offset - 15, Offset - 16);
unsigned short mask = 0xFFFF;
mask <<= (16 - Offset);
first = _mm512_permutexvar_ps(first_idx, first);
Packet16f tmp = _mm512_permutexvar_ps(second_idx, second);
first = _mm512_mask_blend_ps(mask, first, tmp);
}
}
};
template <int Offset>
struct palign_impl<Offset, Packet8d> {
static EIGEN_STRONG_INLINE void run(Packet8d& first, const Packet8d& second) {
if (Offset != 0) {
__m512i first_idx = _mm512_set_epi32(
0, Offset + 7, 0, Offset + 6, 0, Offset + 5, 0, Offset + 4, 0,
Offset + 3, 0, Offset + 2, 0, Offset + 1, 0, Offset);
__m512i second_idx = _mm512_set_epi32(
0, Offset - 1, 0, Offset - 2, 0, Offset - 3, 0, Offset - 4, 0,
Offset - 5, 0, Offset - 6, 0, Offset - 7, 0, Offset - 8);
unsigned char mask = 0xFF;
mask <<= (8 - Offset);
first = _mm512_permutexvar_pd(first_idx, first);
Packet8d tmp = _mm512_permutexvar_pd(second_idx, second);
first = _mm512_mask_blend_pd(mask, first, tmp);
}
}
};
#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \

View File

@ -159,22 +159,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst<Packet2cf>(prod);
}
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
#ifdef _BIG_ENDIAN
first.v = vec_sld(first.v, second.v, 8);
#else
first.v = vec_sld(second.v, first.v, 8);
#endif
}
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
@ -346,16 +330,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
template<int Offset>
struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const

View File

@ -1524,176 +1524,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
return vec_any_ne(x, pzero(x));
}
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
#ifdef _BIG_ENDIAN
switch (Offset % 4) {
case 1:
first = vec_sld(first, second, 4); break;
case 2:
first = vec_sld(first, second, 8); break;
case 3:
first = vec_sld(first, second, 12); break;
}
#else
switch (Offset % 4) {
case 1:
first = vec_sld(second, first, 12); break;
case 2:
first = vec_sld(second, first, 8); break;
case 3:
first = vec_sld(second, first, 4); break;
}
#endif
}
};
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
#ifdef _BIG_ENDIAN
switch (Offset % 4) {
case 1:
first = vec_sld(first, second, 4); break;
case 2:
first = vec_sld(first, second, 8); break;
case 3:
first = vec_sld(first, second, 12); break;
}
#else
switch (Offset % 4) {
case 1:
first = vec_sld(second, first, 12); break;
case 2:
first = vec_sld(second, first, 8); break;
case 3:
first = vec_sld(second, first, 4); break;
}
#endif
}
};
template<int Offset>
struct palign_impl<Offset,Packet8s>
{
static EIGEN_STRONG_INLINE void run(Packet8s& first, const Packet8s& second)
{
#ifdef _BIG_ENDIAN
switch (Offset % 8) {
case 1:
first = vec_sld(first, second, 2); break;
case 2:
first = vec_sld(first, second, 4); break;
case 3:
first = vec_sld(first, second, 6); break;
case 4:
first = vec_sld(first, second, 8); break;
case 5:
first = vec_sld(first, second, 10); break;
case 6:
first = vec_sld(first, second, 12); break;
case 7:
first = vec_sld(first, second, 14); break;
}
#else
switch (Offset % 8) {
case 1:
first = vec_sld(second, first, 14); break;
case 2:
first = vec_sld(second, first, 12); break;
case 3:
first = vec_sld(second, first, 10); break;
case 4:
first = vec_sld(second, first, 8); break;
case 5:
first = vec_sld(second, first, 6); break;
case 6:
first = vec_sld(second, first, 4); break;
case 7:
first = vec_sld(second, first, 2); break;
}
#endif
}
};
template<int Offset>
struct palign_impl<Offset,Packet8us>
{
static EIGEN_STRONG_INLINE void run(Packet8us& first, const Packet8us& second)
{
#ifdef _BIG_ENDIAN
switch (Offset % 8) {
case 1:
first = vec_sld(first, second, 2); break;
case 2:
first = vec_sld(first, second, 4); break;
case 3:
first = vec_sld(first, second, 6); break;
case 4:
first = vec_sld(first, second, 8); break;
case 5:
first = vec_sld(first, second, 10); break;
case 6:
first = vec_sld(first, second, 12); break;
case 7:
first = vec_sld(first, second, 14); break;
}
#else
switch (Offset % 8) {
case 1:
first = vec_sld(second, first, 14); break;
case 2:
first = vec_sld(second, first, 12); break;
case 3:
first = vec_sld(second, first, 10); break;
case 4:
first = vec_sld(second, first, 8); break;
case 5:
first = vec_sld(second, first, 6); break;
case 6:
first = vec_sld(second, first, 4); break;
case 7:
first = vec_sld(second, first, 2); break;
}
#endif
}
};
template<int Offset>
struct palign_impl<Offset,Packet16c>
{
static EIGEN_STRONG_INLINE void run(Packet16c& first, const Packet16c& second)
{
const int shift = Offset % 16;
if ( shift == 0 ) return;
#ifdef _BIG_ENDIAN
first = vec_sld(first, second, shift);
#else
first = vec_sld(first, second, shift);
#endif
}
};
template<int Offset>
struct palign_impl<Offset,Packet16uc>
{
static EIGEN_STRONG_INLINE void run(Packet16uc& first, const Packet16uc& second)
{
const int shift = Offset % 16;
if ( shift == 0 ) return;
#ifdef _BIG_ENDIAN
first = vec_sld(first, second, shift);
#else
first = vec_sld(first, second, shift);
#endif
}
};
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
Packet4f t0, t1, t2, t3;
@ -2362,20 +2192,6 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
return pfirst(pmax(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(a), reinterpret_cast<Packet4ui>(a), 8))));
}
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
{
if (Offset == 1)
#ifdef _BIG_ENDIAN
first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(first), reinterpret_cast<Packet4ui>(second), 8));
#else
first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4ui>(second), reinterpret_cast<Packet4ui>(first), 8));
#endif
}
};
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2d,2>& kernel) {
Packet2d t0, t1;

View File

@ -305,15 +305,6 @@ EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a
(a.v[0] * a.v[3]) + (a.v[1] * a.v[2]));
}
template <int Offset>
struct palign_impl<Offset, Packet2cf> {
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) {
if (Offset == 1) {
first.v = (Packet4f)__builtin_msa_sldi_b((v16i8)second.v, (v16i8)first.v, Offset * 8);
}
}
};
template <>
struct conj_helper<Packet2cf, Packet2cf, false, true> {
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y,
@ -653,15 +644,6 @@ EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd&
return pfirst(a);
}
template <int Offset>
struct palign_impl<Offset, Packet1cd> {
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/) {
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes
// boundary...
}
};
template <>
struct conj_helper<Packet1cd, Packet1cd, false, true> {
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y,

View File

@ -675,25 +675,6 @@ EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) {
return m[0];
}
#define PALIGN_MSA(Offset, Type, Command) \
template <> \
struct palign_impl<Offset, Type> { \
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second) { \
if (Offset != 0) first = (Type)(Command((v16i8)second, (v16i8)first, Offset * 4)); \
} \
};
PALIGN_MSA(0, Packet4f, __builtin_msa_sldi_b)
PALIGN_MSA(1, Packet4f, __builtin_msa_sldi_b)
PALIGN_MSA(2, Packet4f, __builtin_msa_sldi_b)
PALIGN_MSA(3, Packet4f, __builtin_msa_sldi_b)
PALIGN_MSA(0, Packet4i, __builtin_msa_sldi_b)
PALIGN_MSA(1, Packet4i, __builtin_msa_sldi_b)
PALIGN_MSA(2, Packet4i, __builtin_msa_sldi_b)
PALIGN_MSA(3, Packet4i, __builtin_msa_sldi_b)
#undef PALIGN_MSA
inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
os << "[ " << value.packet[0] << "," << std::endl
<< " " << value.packet[1] << "," << std::endl
@ -1168,19 +1149,6 @@ EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
#endif
}
#define PALIGN_MSA(Offset, Type, Command) \
template <> \
struct palign_impl<Offset, Type> { \
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second) { \
if (Offset != 0) first = (Type)(Command((v16i8)second, (v16i8)first, Offset * 8)); \
} \
};
PALIGN_MSA(0, Packet2d, __builtin_msa_sldi_b)
PALIGN_MSA(1, Packet2d, __builtin_msa_sldi_b)
#undef PALIGN_MSA
inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
os << "[ " << value.packet[0] << "," << std::endl << " " << value.packet[1] << " ]";
return os;

View File

@ -340,16 +340,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return s;
}
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset == 1)
first.v = vextq_f32(first.v, second.v, 2);
}
};
template<> struct conj_helper<Packet1cf,Packet1cf,false,true>
{
EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& x, const Packet1cf& y, const Packet1cf& c) const
@ -602,16 +592,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Pack
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
template<int Offset>
struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const

View File

@ -2708,147 +2708,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
return vget_lane_u32(vpmax_u32(tmp, tmp), 0);
}
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
#define PALIGN_NEON(Offset,Type,Command) \
template<>\
struct palign_impl<Offset,Type>\
{\
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
{\
if (Offset!=0)\
first = Command(first, second, Offset);\
}\
};\
template<typename T>
EIGEN_STRONG_INLINE T palign_4c(const T& first, const T &second, const int n)
{
return static_cast<T>((static_cast<uint32_t>(second) << (32 - n * 8)) | (static_cast<uint32_t>(first) >> (n * 8)));
}
PALIGN_NEON(0, Packet2f, vext_f32)
PALIGN_NEON(1, Packet2f, vext_f32)
PALIGN_NEON(0, Packet4f, vextq_f32)
PALIGN_NEON(1, Packet4f, vextq_f32)
PALIGN_NEON(2, Packet4f, vextq_f32)
PALIGN_NEON(3, Packet4f, vextq_f32)
PALIGN_NEON(0, Packet4c, palign_4c)
PALIGN_NEON(1, Packet4c, palign_4c)
PALIGN_NEON(2, Packet4c, palign_4c)
PALIGN_NEON(3, Packet4c, palign_4c)
PALIGN_NEON(0, Packet8c, vext_s8)
PALIGN_NEON(1, Packet8c, vext_s8)
PALIGN_NEON(2, Packet8c, vext_s8)
PALIGN_NEON(3, Packet8c, vext_s8)
PALIGN_NEON(4, Packet8c, vext_s8)
PALIGN_NEON(5, Packet8c, vext_s8)
PALIGN_NEON(6, Packet8c, vext_s8)
PALIGN_NEON(7, Packet8c, vext_s8)
PALIGN_NEON(0, Packet16c, vextq_s8)
PALIGN_NEON(1, Packet16c, vextq_s8)
PALIGN_NEON(2, Packet16c, vextq_s8)
PALIGN_NEON(3, Packet16c, vextq_s8)
PALIGN_NEON(4, Packet16c, vextq_s8)
PALIGN_NEON(5, Packet16c, vextq_s8)
PALIGN_NEON(6, Packet16c, vextq_s8)
PALIGN_NEON(7, Packet16c, vextq_s8)
PALIGN_NEON(8, Packet16c, vextq_s8)
PALIGN_NEON(9, Packet16c, vextq_s8)
PALIGN_NEON(10, Packet16c, vextq_s8)
PALIGN_NEON(11, Packet16c, vextq_s8)
PALIGN_NEON(12, Packet16c, vextq_s8)
PALIGN_NEON(13, Packet16c, vextq_s8)
PALIGN_NEON(14, Packet16c, vextq_s8)
PALIGN_NEON(15, Packet16c, vextq_s8)
PALIGN_NEON(0, Packet4uc, palign_4c)
PALIGN_NEON(1, Packet4uc, palign_4c)
PALIGN_NEON(2, Packet4uc, palign_4c)
PALIGN_NEON(3, Packet4uc, palign_4c)
PALIGN_NEON(0, Packet8uc, vext_u8)
PALIGN_NEON(1, Packet8uc, vext_u8)
PALIGN_NEON(2, Packet8uc, vext_u8)
PALIGN_NEON(3, Packet8uc, vext_u8)
PALIGN_NEON(4, Packet8uc, vext_u8)
PALIGN_NEON(5, Packet8uc, vext_u8)
PALIGN_NEON(6, Packet8uc, vext_u8)
PALIGN_NEON(7, Packet8uc, vext_u8)
PALIGN_NEON(0, Packet16uc, vextq_u8)
PALIGN_NEON(1, Packet16uc, vextq_u8)
PALIGN_NEON(2, Packet16uc, vextq_u8)
PALIGN_NEON(3, Packet16uc, vextq_u8)
PALIGN_NEON(4, Packet16uc, vextq_u8)
PALIGN_NEON(5, Packet16uc, vextq_u8)
PALIGN_NEON(6, Packet16uc, vextq_u8)
PALIGN_NEON(7, Packet16uc, vextq_u8)
PALIGN_NEON(8, Packet16uc, vextq_u8)
PALIGN_NEON(9, Packet16uc, vextq_u8)
PALIGN_NEON(10, Packet16uc, vextq_u8)
PALIGN_NEON(11, Packet16uc, vextq_u8)
PALIGN_NEON(12, Packet16uc, vextq_u8)
PALIGN_NEON(13, Packet16uc, vextq_u8)
PALIGN_NEON(14, Packet16uc, vextq_u8)
PALIGN_NEON(15, Packet16uc, vextq_u8)
PALIGN_NEON(0, Packet4s, vext_s16)
PALIGN_NEON(1, Packet4s, vext_s16)
PALIGN_NEON(2, Packet4s, vext_s16)
PALIGN_NEON(3, Packet4s, vext_s16)
PALIGN_NEON(0, Packet8s, vextq_s16)
PALIGN_NEON(1, Packet8s, vextq_s16)
PALIGN_NEON(2, Packet8s, vextq_s16)
PALIGN_NEON(3, Packet8s, vextq_s16)
PALIGN_NEON(4, Packet8s, vextq_s16)
PALIGN_NEON(5, Packet8s, vextq_s16)
PALIGN_NEON(6, Packet8s, vextq_s16)
PALIGN_NEON(7, Packet8s, vextq_s16)
PALIGN_NEON(0, Packet4us, vext_u16)
PALIGN_NEON(1, Packet4us, vext_u16)
PALIGN_NEON(2, Packet4us, vext_u16)
PALIGN_NEON(3, Packet4us, vext_u16)
PALIGN_NEON(0, Packet8us, vextq_u16)
PALIGN_NEON(1, Packet8us, vextq_u16)
PALIGN_NEON(2, Packet8us, vextq_u16)
PALIGN_NEON(3, Packet8us, vextq_u16)
PALIGN_NEON(4, Packet8us, vextq_u16)
PALIGN_NEON(5, Packet8us, vextq_u16)
PALIGN_NEON(6, Packet8us, vextq_u16)
PALIGN_NEON(7, Packet8us, vextq_u16)
PALIGN_NEON(0, Packet2i, vext_s32)
PALIGN_NEON(1, Packet2i, vext_s32)
PALIGN_NEON(0, Packet4i, vextq_s32)
PALIGN_NEON(1, Packet4i, vextq_s32)
PALIGN_NEON(2, Packet4i, vextq_s32)
PALIGN_NEON(3, Packet4i, vextq_s32)
PALIGN_NEON(0, Packet2ui, vext_u32)
PALIGN_NEON(1, Packet2ui, vext_u32)
PALIGN_NEON(0, Packet4ui, vextq_u32)
PALIGN_NEON(1, Packet4ui, vextq_u32)
PALIGN_NEON(2, Packet4ui, vextq_u32)
PALIGN_NEON(3, Packet4ui, vextq_u32)
PALIGN_NEON(0, Packet2l, vextq_s64)
PALIGN_NEON(1, Packet2l, vextq_s64)
PALIGN_NEON(0, Packet2ul, vextq_u64)
PALIGN_NEON(1, Packet2ul, vextq_u64)
#undef PALIGN_NEON
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2f, 2>& kernel)
{
const float32x2x2_t z = vzip_f32(kernel.packet[0], kernel.packet[1]);
@ -3563,22 +3422,6 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
{ return vgetq_lane_f64(vpmaxq_f64(a,a), 0); }
// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors,
// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074
#define PALIGN_NEON(Offset,Type,Command) \
template<>\
struct palign_impl<Offset,Type>\
{\
EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\
{\
if (Offset!=0)\
first = Command(first, second, Offset);\
}\
};\
PALIGN_NEON(0, Packet2d, vextq_f64)
PALIGN_NEON(1, Packet2d, vextq_f64)
#undef PALIGN_NEON
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2d, 2>& kernel)

View File

@ -161,19 +161,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
}
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
first.v = _mm_movehl_ps(first.v, first.v);
first.v = _mm_movelh_ps(first.v, second.v);
}
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
@ -346,16 +333,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
return pfirst(a);
}
template<int Offset>
struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const

View File

@ -867,114 +867,6 @@ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
return _mm_movemask_ps(x) != 0x0;
}
#if EIGEN_COMP_GNUC
// template <> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
// {
// Packet4f res = b;
// asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
// return res;
// }
// EIGEN_STRONG_INLINE Packet4i _mm_alignr_epi8(const Packet4i& a, const Packet4i& b, const int i)
// {
// Packet4i res = a;
// asm("palignr %[i], %[a], %[b] " : [b] "+x" (res) : [a] "x" (a), [i] "i" (i));
// return res;
// }
#endif
#ifdef EIGEN_VECTORIZE_SSSE3
// SSSE3 versions
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
if (Offset!=0)
first = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(second), _mm_castps_si128(first), Offset*4));
}
};
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
if (Offset!=0)
first = _mm_alignr_epi8(second,first, Offset*4);
}
};
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
first = _mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(second), _mm_castpd_si128(first), 8));
}
};
#else
// SSE2 versions
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
if (Offset==1)
{
first = _mm_move_ss(first,second);
first = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(first),0x39));
}
else if (Offset==2)
{
first = _mm_movehl_ps(first,first);
first = _mm_movelh_ps(first,second);
}
else if (Offset==3)
{
first = _mm_move_ss(first,second);
first = _mm_shuffle_ps(first,second,0x93);
}
}
};
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
if (Offset==1)
{
first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
first = _mm_shuffle_epi32(first,0x39);
}
else if (Offset==2)
{
first = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(first)));
first = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
}
else if (Offset==3)
{
first = _mm_castps_si128(_mm_move_ss(_mm_castsi128_ps(first),_mm_castsi128_ps(second)));
first = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(first),_mm_castsi128_ps(second),0x93));
}
}
};
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
{
if (Offset==1)
{
first = _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(first),_mm_castpd_ps(first)));
first = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(first),_mm_castpd_ps(second)));
}
}
};
#endif
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4f,4>& kernel) {
_MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);

View File

@ -160,16 +160,6 @@ template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const
{
return pfirst(a);
}
template<int Offset>
struct palign_impl<Offset,Packet1cd>
{
static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
{
// FIXME is it sure we never have to align a Packet1cd?
// Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
}
};
template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
@ -331,18 +321,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return res;
}
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset == 1) {
first.cd[0] = first.cd[1];
first.cd[1] = second.cd[0];
}
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
@ -457,18 +435,6 @@ template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const P
return pfirst<Packet2cf>(prod);
}
template<int Offset>
struct palign_impl<Offset,Packet2cf>
{
static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
{
if (Offset==1)
{
first.v = vec_sld(first.v, second.v, 8);
}
}
};
template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const

View File

@ -298,33 +298,6 @@ inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
}
#endif
template<int Offset>
struct palign_impl<Offset,Packet4i>
{
static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second)
{
switch (Offset % 4) {
case 1:
first = vec_sld(first, second, 4); break;
case 2:
first = vec_sld(first, second, 8); break;
case 3:
first = vec_sld(first, second, 12); break;
}
}
};
template<int Offset>
struct palign_impl<Offset,Packet2d>
{
static EIGEN_STRONG_INLINE void run(Packet2d& first, const Packet2d& second)
{
if (Offset == 1)
first = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(first), reinterpret_cast<Packet4i>(second), 8));
}
};
template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
{
// FIXME: No intrinsic yet
@ -636,30 +609,6 @@ template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Pack
return splat;
}
/* This is a tricky one, we have to translate float alignment to vector elements of sizeof double
*/
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
switch (Offset % 4) {
case 1:
first.v4f[0] = vec_sld(first.v4f[0], first.v4f[1], 8);
first.v4f[1] = vec_sld(first.v4f[1], second.v4f[0], 8);
break;
case 2:
first.v4f[0] = first.v4f[1];
first.v4f[1] = second.v4f[0];
break;
case 3:
first.v4f[0] = vec_sld(first.v4f[1], second.v4f[0], 8);
first.v4f[1] = vec_sld(second.v4f[0], second.v4f[1], 8);
break;
}
}
};
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
{
// FIXME: No intrinsic yet
@ -942,22 +891,6 @@ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, cons
return result;
}
#else
template<int Offset>
struct palign_impl<Offset,Packet4f>
{
static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second)
{
switch (Offset % 4) {
case 1:
first = vec_sld(first, second, 4); break;
case 2:
first = vec_sld(first, second, 8); break;
case 3:
first = vec_sld(first, second, 12); break;
}
}
};
template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
{
// FIXME: No intrinsic yet

View File

@ -103,7 +103,6 @@ template<typename Scalar,typename Packet> void packetmath()
EIGEN_ALIGN_MAX Scalar data1[size];
EIGEN_ALIGN_MAX Scalar data2[size];
EIGEN_ALIGN_MAX Scalar data3[size];
EIGEN_ALIGN_MAX Packet packets[PacketSize*2];
EIGEN_ALIGN_MAX Scalar ref[size];
RealScalar refvalue = RealScalar(0);
for (int i=0; i<size; ++i)
@ -163,38 +162,6 @@ template<typename Scalar,typename Packet> void packetmath()
}
}
for (int offset=0; offset<PacketSize; ++offset)
{
#define MIN(A,B) (A<B?A:B)
packets[0] = internal::pload<Packet>(data1);
packets[1] = internal::pload<Packet>(data1+PacketSize);
if (offset==0) internal::palign<0>(packets[0], packets[1]);
else if (offset==1) internal::palign<MIN(1,PacketSize-1)>(packets[0], packets[1]);
else if (offset==2) internal::palign<MIN(2,PacketSize-1)>(packets[0], packets[1]);
else if (offset==3) internal::palign<MIN(3,PacketSize-1)>(packets[0], packets[1]);
else if (offset==4) internal::palign<MIN(4,PacketSize-1)>(packets[0], packets[1]);
else if (offset==5) internal::palign<MIN(5,PacketSize-1)>(packets[0], packets[1]);
else if (offset==6) internal::palign<MIN(6,PacketSize-1)>(packets[0], packets[1]);
else if (offset==7) internal::palign<MIN(7,PacketSize-1)>(packets[0], packets[1]);
else if (offset==8) internal::palign<MIN(8,PacketSize-1)>(packets[0], packets[1]);
else if (offset==9) internal::palign<MIN(9,PacketSize-1)>(packets[0], packets[1]);
else if (offset==10) internal::palign<MIN(10,PacketSize-1)>(packets[0], packets[1]);
else if (offset==11) internal::palign<MIN(11,PacketSize-1)>(packets[0], packets[1]);
else if (offset==12) internal::palign<MIN(12,PacketSize-1)>(packets[0], packets[1]);
else if (offset==13) internal::palign<MIN(13,PacketSize-1)>(packets[0], packets[1]);
else if (offset==14) internal::palign<MIN(14,PacketSize-1)>(packets[0], packets[1]);
else if (offset==15) internal::palign<MIN(15,PacketSize-1)>(packets[0], packets[1]);
internal::pstore(data2, packets[0]);
for (int i=0; i<PacketSize; ++i)
ref[i] = data1[i+offset];
// palign is not used anymore, so let's just put a warning if it fails
++g_test_level;
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::palign");
--g_test_level;
}
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd);
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);