mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 11:19:02 +08:00
fix alignment issue in ploaddup for AVX512
This commit is contained in:
parent
e95696acb3
commit
626942d9dd
@ -466,7 +466,9 @@ EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
|
|||||||
// {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}
|
// {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
|
EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
|
||||||
__m256i low_half = _mm256_load_si256(reinterpret_cast<const __m256i*>(from));
|
// an unaligned load is required here as there is no requirement
|
||||||
|
// on the alignment of input pointer 'from'
|
||||||
|
__m256i low_half = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from));
|
||||||
__m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half));
|
__m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half));
|
||||||
__m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0));
|
__m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
return pairs;
|
return pairs;
|
||||||
|
@ -227,6 +227,7 @@ template<typename Scalar> void packetmath()
|
|||||||
|
|
||||||
if(PacketSize>1)
|
if(PacketSize>1)
|
||||||
{
|
{
|
||||||
|
// apply different offsets to check that ploaddup is robust to unaligned inputs
|
||||||
for(int offset=0;offset<4;++offset)
|
for(int offset=0;offset<4;++offset)
|
||||||
{
|
{
|
||||||
for(int i=0;i<PacketSize/2;++i)
|
for(int i=0;i<PacketSize/2;++i)
|
||||||
@ -238,6 +239,7 @@ template<typename Scalar> void packetmath()
|
|||||||
|
|
||||||
if(PacketSize>2)
|
if(PacketSize>2)
|
||||||
{
|
{
|
||||||
|
// apply different offsets to check that ploadquad is robust to unaligned inputs
|
||||||
for(int offset=0;offset<4;++offset)
|
for(int offset=0;offset<4;++offset)
|
||||||
{
|
{
|
||||||
for(int i=0;i<PacketSize/4;++i)
|
for(int i=0;i<PacketSize/4;++i)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user