MatrixProuct enhancements:

- Changes to Altivec/MatrixProduct
  Adapting code to gcc 10.
  Generic code style and performance enhancements.
  Adding PanelMode support.
  Adding stride/offset support.
  Enabling float64, std::complex and std::complex.
  Fixing lack of symm_pack.
  Enabling mixedtypes.
- Adding std::complex tests to blasutil.
- Adding an implementation of storePacketBlock when Incr!= 1.
This commit is contained in:
Everton Constantino 2020-05-20 14:01:02 -03:00
parent 6568856275
commit 6fe88a3c9d
4 changed files with 3005 additions and 248 deletions

View File

@ -335,7 +335,7 @@ using std::ptrdiff_t;
#include "src/Core/CoreIterators.h"
#include "src/Core/ConditionEstimator.h"
#if EIGEN_ARCH_PPC
#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
#include "src/Core/arch/AltiVec/MatrixProduct.h"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -391,6 +391,77 @@ public:
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
}
// storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types.
template<typename SubPacket, typename ScalarT, int n, int idx>
struct storePacketBlock_helper
{
storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
spbh.store(sup, i,j,block);
for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
{
ScalarT *v = &sup->operator()(i+l, j+idx);
*v = block.packet[idx][l];
}
}
};
template<typename SubPacket, int n, int idx>
struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx>
{
storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
spbh.store(sup,i,j,block);
for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
{
std::complex<float> *v = &sup->operator()(i+l, j+idx);
v->real(block.packet[idx].v[2*l+0]);
v->imag(block.packet[idx].v[2*l+1]);
}
}
};
template<typename SubPacket, int n, int idx>
struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx>
{
storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
spbh.store(sup,i,j,block);
for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
{
std::complex<double> *v = &sup->operator()(i+l, j+idx);
v->real(block.packet[idx].v[2*l+0]);
v->imag(block.packet[idx].v[2*l+1]);
}
}
};
template<typename SubPacket, typename ScalarT, int n>
struct storePacketBlock_helper<SubPacket, ScalarT, n, -1>
{
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
}
};
template<typename SubPacket, int n>
struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1>
{
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
}
};
template<typename SubPacket, int n>
struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1>
{
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
}
};
// This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible.
template<typename SubPacket, int n>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const {
storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb;
spb.store(this, i,j,block);
}
protected:
Scalar* EIGEN_RESTRICT m_data;
const Index m_stride;

View File

@ -200,5 +200,7 @@ EIGEN_DECLARE_TEST(blasutil)
CALL_SUBTEST_5(run_test<float_t>());
CALL_SUBTEST_6(run_test<double_t>());
CALL_SUBTEST_7(run_test<std::complex<float> >());
CALL_SUBTEST_8(run_test<std::complex<double> >());
}
}