mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 02:33:59 +08:00
Add LoongArch64 architecture LSX support.(build/test )
This commit is contained in:
parent
c486af5ad3
commit
e986838464
@ -486,6 +486,12 @@ if (EIGEN_BUILD_TESTING)
|
||||
message(STATUS "Enabling MSA in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_LSX "Enable/Disable LSX in tests/examples" OFF)
|
||||
if(EIGEN_TEST_LSX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mlsx")
|
||||
message(STATUS "Enabling LSX in tests/examples")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
|
||||
if(EIGEN_TEST_NEON)
|
||||
if(EIGEN_TEST_FMA)
|
||||
|
@ -234,6 +234,11 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/NEON/TypeCasting.h"
|
||||
#include "src/Core/arch/NEON/MathFunctions.h"
|
||||
#include "src/Core/arch/NEON/Complex.h"
|
||||
#elif defined EIGEN_VECTORIZE_LSX
|
||||
#include "src/Core/arch/LSX/PacketMath.h"
|
||||
#include "src/Core/arch/LSX/TypeCasting.h"
|
||||
#include "src/Core/arch/LSX/MathFunctions.h"
|
||||
#include "src/Core/arch/LSX/Complex.h"
|
||||
#elif defined EIGEN_VECTORIZE_SVE
|
||||
#include "src/Core/arch/SVE/PacketMath.h"
|
||||
#include "src/Core/arch/SVE/TypeCasting.h"
|
||||
@ -381,6 +386,8 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/AltiVec/MatrixProduct.h"
|
||||
#elif defined EIGEN_VECTORIZE_NEON
|
||||
#include "src/Core/arch/NEON/GeneralBlockPanelKernel.h"
|
||||
#elif defined EIGEN_VECTORIZE_LSX
|
||||
#include "src/Core/arch/LSX/GeneralBlockPanelKernel.h"
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_VECTORIZE_AVX512)
|
||||
|
520
Eigen/src/Core/arch/LSX/Complex.h
Normal file
520
Eigen/src/Core/arch/LSX/Complex.h
Normal file
@ -0,0 +1,520 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// copyright (c) 2023 zang ruochen <zangruochen@loongson.cn>
|
||||
// copyright (c) 2024 XiWei Gu <guxiwei-hf@loongson.cn>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_LSX_H
|
||||
#define EIGEN_COMPLEX_LSX_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
//---------- float ----------
|
||||
struct Packet2cf {
|
||||
EIGEN_STRONG_INLINE Packet2cf() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
|
||||
Packet4f v;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<std::complex<float> > : default_packet_traits {
|
||||
typedef Packet2cf type;
|
||||
typedef Packet2cf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = 2,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasSqrt = 1,
|
||||
HasExp = 1,
|
||||
HasAbs = 0,
|
||||
HasLog = 1,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<Packet2cf> {
|
||||
typedef std::complex<float> type;
|
||||
typedef Packet2cf half;
|
||||
typedef Packet4f as_real;
|
||||
enum {
|
||||
size = 2,
|
||||
alignment = Aligned16,
|
||||
vectorizable = true,
|
||||
masked_load_available = false,
|
||||
masked_store_available = false
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
return Packet2cf(__lsx_vfadd_s(a.v, b.v));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
return Packet2cf(__lsx_vfsub_s(a.v, b.v));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) {
|
||||
const uint32_t b[4] = {0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u};
|
||||
Packet4i mask = (Packet4i)__lsx_vld(b, 0);
|
||||
Packet2cf res;
|
||||
res.v = (Packet4f)__lsx_vxor_v((__m128i)a.v, mask);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) {
|
||||
const uint32_t b[4] = {0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u};
|
||||
Packet4i mask = (__m128i)__lsx_vld(b, 0);
|
||||
Packet2cf res;
|
||||
res.v = (Packet4f)__lsx_vxor_v((__m128i)a.v, mask);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
Packet4f part0_tmp = (Packet4f)__lsx_vfmul_s(a.v, b.v);
|
||||
Packet4f part0 = __lsx_vfsub_s(part0_tmp, (__m128)__lsx_vshuf4i_w(part0_tmp, 0x31));
|
||||
Packet4f part1_tmp = __lsx_vfmul_s((__m128)__lsx_vshuf4i_w(a.v, 0xb1), b.v);
|
||||
Packet4f part1 = __lsx_vfadd_s(part1_tmp, (__m128)__lsx_vshuf4i_w(part1_tmp, 0x31));
|
||||
Packet2cf res;
|
||||
res.v = (Packet4f)__lsx_vpackev_w((__m128i)part1, (__m128i)part0);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf ptrue<Packet2cf>(const Packet2cf& a) {
|
||||
return Packet2cf(ptrue(Packet4f(a.v)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
Packet2cf res;
|
||||
res.v = (Packet4f)__lsx_vand_v((__m128i)a.v, (__m128i)b.v);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
Packet2cf res;
|
||||
res.v = (Packet4f)__lsx_vor_v((__m128i)a.v, (__m128i)b.v);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
Packet2cf res;
|
||||
res.v = (Packet4f)__lsx_vxor_v((__m128i)a.v, (__m128i)b.v);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
Packet2cf res;
|
||||
res.v = (Packet4f)__lsx_vandn_v((__m128i)b.v, (__m128i)a.v);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) {
|
||||
float f0 = from.real(), f1 = from.imag();
|
||||
Packet4f re = {f0, f0, f0, f0};
|
||||
Packet4f im = {f1, f1, f1, f1};
|
||||
return Packet2cf((Packet4f)__lsx_vilvl_w((__m128i)im, (__m128i)re));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) {
|
||||
return pset1<Packet2cf>(*from);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet2cf& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from,
|
||||
Index stride) {
|
||||
Packet2cf res;
|
||||
__m128i tmp = __lsx_vldrepl_d(from, 0);
|
||||
__m128i tmp1 = __lsx_vldrepl_d(from + stride, 0);
|
||||
tmp = __lsx_vilvl_d(tmp1, tmp);
|
||||
res.v = (__m128)tmp;
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from,
|
||||
Index stride) {
|
||||
__lsx_vstelm_d((__m128i)from.v, to, 0, 0);
|
||||
__lsx_vstelm_d((__m128i)from.v, to + stride, 0, 1);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float>* addr) {
|
||||
__builtin_prefetch(addr);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) {
|
||||
EIGEN_ALIGN16 std::complex<float> res[2];
|
||||
__lsx_vst(a.v, res, 0);
|
||||
return res[0];
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) {
|
||||
Packet2cf res;
|
||||
res.v = (Packet4f)__lsx_vshuf4i_w(a.v, 0x4e);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a) {
|
||||
return pfirst(Packet2cf(__lsx_vfadd_s(a.v, vec4f_movehl(a.v, a.v))));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a) {
|
||||
return pfirst(pmul(a, Packet2cf(vec4f_movehl(a.v, a.v))));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip /* <Packet2cf> */ (const Packet2cf& x) {
|
||||
return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
|
||||
}
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf, Packet4f)
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
return pdiv_complex(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf plog<Packet2cf>(const Packet2cf& a) {
|
||||
return plog_complex(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pzero(const Packet2cf& /* a */) {
|
||||
__m128 v = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
return (Packet2cf)v;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pmadd<Packet2cf>(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) {
|
||||
Packet2cf result, t0, t1, t2;
|
||||
t1 = pzero(t1);
|
||||
t0.v = (__m128)__lsx_vpackev_w((__m128i)a.v, (__m128i)a.v);
|
||||
t2.v = __lsx_vfmadd_s(t0.v, b.v, c.v);
|
||||
result.v = __lsx_vfadd_s(t2.v, t1.v);
|
||||
t1.v = __lsx_vfsub_s(t1.v, a.v);
|
||||
t1.v = (__m128)__lsx_vpackod_w((__m128i)a.v, (__m128i)t1.v);
|
||||
t2.v = (__m128)__lsx_vshuf4i_w((__m128i)b.v, 0xb1);
|
||||
result.v = __lsx_vfmadd_s(t1.v, t2.v, result.v);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
|
||||
return pexp_complex(a);
|
||||
}
|
||||
|
||||
//---------- double ----------
|
||||
struct Packet1cd {
|
||||
EIGEN_STRONG_INLINE Packet1cd() {}
|
||||
EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
|
||||
Packet2d v;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<std::complex<double> > : default_packet_traits {
|
||||
typedef Packet1cd type;
|
||||
typedef Packet1cd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = 1,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasSqrt = 1,
|
||||
HasAbs = 0,
|
||||
HasLog = 1,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<Packet1cd> {
|
||||
typedef std::complex<double> type;
|
||||
typedef Packet1cd half;
|
||||
typedef Packet2d as_real;
|
||||
enum {
|
||||
size = 1,
|
||||
alignment = Aligned16,
|
||||
vectorizable = true,
|
||||
masked_load_available = false,
|
||||
masked_store_available = false
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
return Packet1cd(__lsx_vfadd_d(a.v, b.v));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
return Packet1cd(__lsx_vfsub_d(a.v, b.v));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {
|
||||
return Packet1cd(pnegate(Packet2d(a.v)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
|
||||
const uint64_t tmp[2] = {0x0000000000000000u, 0x8000000000000000u};
|
||||
__m128i mask = __lsx_vld(tmp, 0);
|
||||
Packet1cd res;
|
||||
res.v = (Packet2d)__lsx_vxor_v((__m128i)a.v, mask);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
Packet2d tmp_real = __lsx_vfmul_d(a.v, b.v);
|
||||
Packet2d real = __lsx_vfsub_d(tmp_real, preverse(tmp_real));
|
||||
|
||||
Packet2d tmp_imag = __lsx_vfmul_d(preverse(a.v), b.v);
|
||||
Packet2d imag = (__m128d)__lsx_vfadd_d((__m128d)tmp_imag, preverse(tmp_imag));
|
||||
Packet1cd res;
|
||||
res.v = (__m128d)__lsx_vilvl_d((__m128i)imag, (__m128i)real);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd ptrue<Packet1cd>(const Packet1cd& a) {
|
||||
return Packet1cd(ptrue(Packet2d(a.v)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
Packet1cd res;
|
||||
res.v = (Packet2d)__lsx_vand_v((__m128i)a.v, (__m128i)b.v);
|
||||
return res;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
Packet1cd res;
|
||||
res.v = (Packet2d)__lsx_vor_v((__m128i)a.v, (__m128i)b.v);
|
||||
return res;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
Packet1cd res;
|
||||
res.v = (Packet2d)__lsx_vxor_v((__m128i)a.v, (__m128i)b.v);
|
||||
return res;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
Packet1cd res;
|
||||
res.v = (Packet2d)__lsx_vandn_v((__m128i)b.v, (__m128i)a.v);
|
||||
return res;
|
||||
}
|
||||
|
||||
// FIXME force unaligned load, this is a temporary fix
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd
|
||||
pset1<Packet1cd>(const std::complex<double>& from) { /* here we really have to use unaligned loads :( */
|
||||
return ploadu<Packet1cd>(&from);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) {
|
||||
return pset1<Packet1cd>(*from);
|
||||
}
|
||||
|
||||
// FIXME force unaligned store, this is a temporary fix
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const Packet1cd& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double>* addr) {
|
||||
__builtin_prefetch(addr);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) {
|
||||
EIGEN_ALIGN16 double res[2];
|
||||
__lsx_vst(a.v, res, 0);
|
||||
return std::complex<double>(res[0], res[1]);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) {
|
||||
return pfirst(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) {
|
||||
return pfirst(a);
|
||||
}
|
||||
|
||||
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd, Packet2d)
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
return pdiv_complex(a, b);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip /* <Packet1cd> */ (const Packet1cd& x) {
|
||||
return Packet1cd(preverse(Packet2d(x.v)));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel) {
|
||||
Packet4f tmp1 = (Packet4f)__lsx_vilvl_w((__m128i)kernel.packet[1].v, (__m128i)kernel.packet[0].v);
|
||||
Packet4f tmp2 = (Packet4f)__lsx_vilvh_w((__m128i)kernel.packet[1].v, (__m128i)kernel.packet[0].v);
|
||||
kernel.packet[0].v = (Packet4f)__lsx_vshuf4i_w(tmp1, 0xd8);
|
||||
kernel.packet[1].v = (Packet4f)__lsx_vshuf4i_w(tmp2, 0xd8);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b) {
|
||||
Packet4f eq = (Packet4f)__lsx_vfcmp_ceq_s(a.v, b.v);
|
||||
return Packet2cf(pand<Packet4f>(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) {
|
||||
Packet2d eq = (Packet2d)__lsx_vfcmp_ceq_d(a.v, b.v);
|
||||
return Packet1cd(pand<Packet2d>(eq, preverse(eq)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Packet2cf pselect(const Packet2cf& mask, const Packet2cf& a, const Packet2cf& b) {
|
||||
Packet2cf res;
|
||||
res.v = (Packet4f)__lsx_vbitsel_v((__m128i)b.v, (__m128i)a.v, (__m128i)mask.v);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
|
||||
return psqrt_complex<Packet1cd>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
|
||||
return psqrt_complex<Packet2cf>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd plog<Packet1cd>(const Packet1cd& a) {
|
||||
return plog_complex(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pzero<Packet1cd>(const Packet1cd& /* a */) {
|
||||
__m128d v = {0.0, 0.0};
|
||||
return (Packet1cd)v;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmadd<Packet1cd>(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) {
|
||||
Packet1cd result, t0, t1, t2;
|
||||
t1 = pzero(t1);
|
||||
t0.v = (__m128d)__lsx_vpackev_d((__m128i)a.v, (__m128i)a.v);
|
||||
t2.v = __lsx_vfmadd_d(t0.v, b.v, c.v);
|
||||
result.v = __lsx_vfadd_d(t2.v, t1.v);
|
||||
t1.v = __lsx_vfsub_d(t1.v, a.v);
|
||||
t1.v = (__m128d)__lsx_vpackod_d((__m128i)a.v, (__m128i)t1.v);
|
||||
t2.v = (__m128d)__lsx_vshuf4i_d((__m128i)t2.v, (__m128i)b.v, 0xb);
|
||||
result.v = __lsx_vfmadd_d(t1.v, t2.v, result.v);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from,
|
||||
Index /* stride */) {
|
||||
Packet1cd res;
|
||||
__m128i tmp = __lsx_vld((void*)from, 0);
|
||||
res.v = (__m128d)tmp;
|
||||
return res;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from,
|
||||
Index /* stride */) {
|
||||
__lsx_vst((__m128i)from.v, (void*)to, 0);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd, 2>& kernel) {
|
||||
Packet2d tmp = (__m128d)__lsx_vilvl_d((__m128i)kernel.packet[1].v, (__m128i)kernel.packet[0].v);
|
||||
kernel.packet[1].v = (__m128d)__lsx_vilvh_d((__m128i)kernel.packet[1].v, (__m128i)kernel.packet[0].v);
|
||||
kernel.packet[0].v = tmp;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_LSX_H
|
23
Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h
Normal file
23
Eigen/src/Core/arch/LSX/GeneralBlockPanelKernel.h
Normal file
@ -0,0 +1,23 @@
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
#ifndef EIGEN_LSX_GEBP_NR
|
||||
#define EIGEN_LSX_GEBP_NR 8
|
||||
#endif
|
||||
|
||||
template <>
|
||||
struct gebp_traits<float, float, false, false, Architecture::LSX, GEBPPacketFull>
|
||||
: gebp_traits<float, float, false, false, Architecture::Generic, GEBPPacketFull> {
|
||||
enum { nr = EIGEN_LSX_GEBP_NR };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct gebp_traits<double, double, false, false, Architecture::LSX, GEBPPacketFull>
|
||||
: gebp_traits<double, double, false, false, Architecture::Generic, GEBPPacketFull> {
|
||||
enum { nr = EIGEN_LSX_GEBP_NR };
|
||||
};
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
43
Eigen/src/Core/arch/LSX/MathFunctions.h
Normal file
43
Eigen/src/Core/arch/LSX/MathFunctions.h
Normal file
@ -0,0 +1,43 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2024 XiWei Gu (guxiwei-hf@loongson.cn)
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_LSX_H
|
||||
#define EIGEN_MATH_FUNCTIONS_LSX_H
|
||||
|
||||
/* The sin and cos functions of this file are loosely derived from
|
||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||
*/
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(atanh, Packet2d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet2d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet2d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(tanh, Packet2d)
|
||||
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(atanh, Packet4f)
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log, Packet4f)
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log2, Packet4f)
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(tanh, Packet4f)
|
||||
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet2d)
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4f)
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet2d)
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4f)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_LSX_H
|
2866
Eigen/src/Core/arch/LSX/PacketMath.h
Normal file
2866
Eigen/src/Core/arch/LSX/PacketMath.h
Normal file
File diff suppressed because it is too large
Load Diff
526
Eigen/src/Core/arch/LSX/TypeCasting.h
Normal file
526
Eigen/src/Core/arch/LSX/TypeCasting.h
Normal file
@ -0,0 +1,526 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2023 Zang Ruochen <zangruochen@loongson.cn>
|
||||
// Copyright (C) 2024 XiWei Gu <guxiwei-hf@loongson.cn>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TYPE_CASTING_LSX_H
|
||||
#define EIGEN_TYPE_CASTING_LSX_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
//==============================================================================
|
||||
// preinterpret
|
||||
//==============================================================================
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a) {
|
||||
return (__m128)((__m128i)a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4ui>(const Packet4ui& a) {
|
||||
return (__m128)((__m128i)a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {
|
||||
return (__m128d)((__m128i)a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2ul>(const Packet2ul& a) {
|
||||
return (__m128d)((__m128i)a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a) {
|
||||
return (__m128d)((__m128i)a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s, Packet8us>(const Packet8us& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(const Packet4f& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc preinterpret<Packet16uc, Packet16c>(const Packet16c& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us preinterpret<Packet8us, Packet8s>(const Packet8s& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4f>(const Packet4f& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2d>(const Packet2d& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2l>(const Packet2l& a) {
|
||||
return (__m128i)a;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet4f, Packet2l>(const Packet4f& a) {
|
||||
Packet2d tmp = __lsx_vfcvtl_d_s(a);
|
||||
return __lsx_vftint_l_d(tmp);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul pcast<Packet4f, Packet2ul>(const Packet4f& a) {
|
||||
Packet2d tmp = __lsx_vfcvtl_d_s(a);
|
||||
return __lsx_vftint_lu_d(tmp);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
|
||||
return __lsx_vftint_w_s(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui pcast<Packet4f, Packet4ui>(const Packet4f& a) {
|
||||
return __lsx_vftint_wu_s(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s pcast<Packet4f, Packet8s>(const Packet4f& a, const Packet4f& b) {
|
||||
return __lsx_vssrlni_h_w(__lsx_vftint_w_s(a), __lsx_vftint_w_s(b), 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us pcast<Packet4f, Packet8us>(const Packet4f& a, const Packet4f& b) {
|
||||
return __lsx_vssrlni_hu_w(__lsx_vftint_wu_s(a), __lsx_vftint_wu_s(b), 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c pcast<Packet4f, Packet16c>(const Packet4f& a, const Packet4f& b, const Packet4f& c,
|
||||
const Packet4f& d) {
|
||||
Packet8s tmp1 = __lsx_vssrlni_h_w(__lsx_vftint_w_s(a), __lsx_vftint_w_s(b), 0);
|
||||
Packet8s tmp2 = __lsx_vssrlni_h_w(__lsx_vftint_w_s(c), __lsx_vftint_w_s(d), 0);
|
||||
return __lsx_vssrlni_b_h((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc pcast<Packet4f, Packet16uc>(const Packet4f& a, const Packet4f& b, const Packet4f& c,
|
||||
const Packet4f& d) {
|
||||
Packet8us tmp1 = __lsx_vssrlni_hu_w(__lsx_vftint_wu_s(a), __lsx_vftint_wu_s(b), 0);
|
||||
Packet8us tmp2 = __lsx_vssrlni_hu_w(__lsx_vftint_wu_s(c), __lsx_vftint_wu_s(d), 0);
|
||||
return __lsx_vssrlni_bu_h((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet16c, Packet4f>(const Packet16c& a) {
|
||||
Packet8s tmp1 = __lsx_vsllwil_h_b((__m128i)a, 0);
|
||||
Packet4i tmp2 = __lsx_vsllwil_w_h((__m128i)tmp1, 0);
|
||||
return __lsx_vffint_s_w(tmp2);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet16c, Packet2l>(const Packet16c& a) {
|
||||
Packet8s tmp1 = __lsx_vsllwil_h_b((__m128i)a, 0);
|
||||
Packet4i tmp2 = __lsx_vsllwil_w_h((__m128i)tmp1, 0);
|
||||
return __lsx_vsllwil_d_w((__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul pcast<Packet16c, Packet2ul>(const Packet16c& a) {
|
||||
Packet8s tmp1 = __lsx_vsllwil_h_b((__m128i)a, 0);
|
||||
Packet4i tmp2 = __lsx_vsllwil_w_h((__m128i)tmp1, 0);
|
||||
return (Packet2ul)__lsx_vsllwil_d_w((__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pcast<Packet16c, Packet4i>(const Packet16c& a) {
|
||||
Packet8s tmp1 = __lsx_vsllwil_h_b((__m128i)a, 0);
|
||||
return __lsx_vsllwil_w_h((__m128i)tmp1, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui pcast<Packet16c, Packet4ui>(const Packet16c& a) {
|
||||
Packet8s tmp1 = __lsx_vsllwil_h_b((__m128i)a, 0);
|
||||
return (Packet4ui)__lsx_vsllwil_w_h((__m128i)tmp1, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s pcast<Packet16c, Packet8s>(const Packet16c& a) {
|
||||
return __lsx_vsllwil_h_b((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us pcast<Packet16c, Packet8us>(const Packet16c& a) {
|
||||
return (Packet8us)__lsx_vsllwil_h_b((__m128i)a, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet16uc, Packet4f>(const Packet16uc& a) {
|
||||
Packet8us tmp1 = __lsx_vsllwil_hu_bu((__m128i)a, 0);
|
||||
Packet4ui tmp2 = __lsx_vsllwil_wu_hu((__m128i)tmp1, 0);
|
||||
return __lsx_vffint_s_wu(tmp2);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul pcast<Packet16uc, Packet2ul>(const Packet16uc& a) {
|
||||
Packet8us tmp1 = __lsx_vsllwil_hu_bu((__m128i)a, 0);
|
||||
Packet4ui tmp2 = __lsx_vsllwil_wu_hu((__m128i)tmp1, 0);
|
||||
return __lsx_vsllwil_du_wu((__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet16uc, Packet2l>(const Packet16uc& a) {
|
||||
Packet8us tmp1 = __lsx_vsllwil_hu_bu((__m128i)a, 0);
|
||||
Packet4ui tmp2 = __lsx_vsllwil_wu_hu((__m128i)tmp1, 0);
|
||||
return (Packet2l)__lsx_vsllwil_du_wu((__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui pcast<Packet16uc, Packet4ui>(const Packet16uc& a) {
|
||||
Packet8us tmp1 = __lsx_vsllwil_hu_bu((__m128i)a, 0);
|
||||
return __lsx_vsllwil_wu_hu((__m128i)tmp1, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pcast<Packet16uc, Packet4i>(const Packet16uc& a) {
|
||||
Packet8us tmp1 = __lsx_vsllwil_hu_bu((__m128i)a, 0);
|
||||
return (Packet4i)__lsx_vsllwil_wu_hu((__m128i)tmp1, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us pcast<Packet16uc, Packet8us>(const Packet16uc& a) {
|
||||
return __lsx_vsllwil_hu_bu((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s pcast<Packet16uc, Packet8s>(const Packet16uc& a) {
|
||||
return (Packet8s)__lsx_vsllwil_hu_bu((__m128i)a, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet8s, Packet4f>(const Packet8s& a) {
|
||||
Packet4i tmp1 = __lsx_vsllwil_w_h((__m128i)a, 0);
|
||||
return __lsx_vffint_s_w(tmp1);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet8s, Packet2l>(const Packet8s& a) {
|
||||
Packet4i tmp1 = __lsx_vsllwil_w_h((__m128i)a, 0);
|
||||
return __lsx_vsllwil_d_w((__m128i)tmp1, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul pcast<Packet8s, Packet2ul>(const Packet8s& a) {
|
||||
Packet4i tmp1 = __lsx_vsllwil_w_h((__m128i)a, 0);
|
||||
return (Packet2ul)__lsx_vsllwil_d_w((__m128i)tmp1, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pcast<Packet8s, Packet4i>(const Packet8s& a) {
|
||||
return __lsx_vsllwil_w_h((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui pcast<Packet8s, Packet4ui>(const Packet8s& a) {
|
||||
return (Packet4ui)__lsx_vsllwil_w_h((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c pcast<Packet8s, Packet16c>(const Packet8s& a, const Packet8s& b) {
|
||||
return __lsx_vssrlni_b_h((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc pcast<Packet8s, Packet16uc>(const Packet8s& a, const Packet8s& b) {
|
||||
return (Packet16uc)__lsx_vssrlni_b_h((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet8us, Packet4f>(const Packet8us& a) {
|
||||
Packet4ui tmp1 = __lsx_vsllwil_wu_hu((__m128i)a, 0);
|
||||
return __lsx_vffint_s_wu(tmp1);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul pcast<Packet8us, Packet2ul>(const Packet8us& a) {
|
||||
Packet4ui tmp1 = __lsx_vsllwil_wu_hu((__m128i)a, 0);
|
||||
return __lsx_vsllwil_du_wu((__m128i)tmp1, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet8us, Packet2l>(const Packet8us& a) {
|
||||
Packet4ui tmp1 = __lsx_vsllwil_wu_hu((__m128i)a, 0);
|
||||
return (Packet2l)__lsx_vsllwil_du_wu((__m128i)tmp1, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui pcast<Packet8us, Packet4ui>(const Packet8us& a) {
|
||||
return __lsx_vsllwil_wu_hu((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pcast<Packet8us, Packet4i>(const Packet8us& a) {
|
||||
return (Packet4i)__lsx_vsllwil_wu_hu((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc pcast<Packet8us, Packet16uc>(const Packet8us& a, const Packet8us& b) {
|
||||
return __lsx_vssrlni_bu_h((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c pcast<Packet8us, Packet16c>(const Packet8us& a, const Packet8us& b) {
|
||||
return (Packet16c)__lsx_vssrlni_bu_h((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
|
||||
return __lsx_vffint_s_w(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet4i, Packet2l>(const Packet4i& a) {
|
||||
return __lsx_vsllwil_d_w((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul pcast<Packet4i, Packet2ul>(const Packet4i& a) {
|
||||
return (Packet2ul)__lsx_vsllwil_d_w((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s pcast<Packet4i, Packet8s>(const Packet4i& a, const Packet4i& b) {
|
||||
return __lsx_vssrlni_h_w((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us pcast<Packet4i, Packet8us>(const Packet4i& a, const Packet4i& b) {
|
||||
return (Packet8us)__lsx_vssrlni_h_w((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c pcast<Packet4i, Packet16c>(const Packet4i& a, const Packet4i& b, const Packet4i& c,
|
||||
const Packet4i& d) {
|
||||
Packet8s tmp1 = __lsx_vssrlni_h_w((__m128i)a, (__m128i)b, 0);
|
||||
Packet8s tmp2 = __lsx_vssrlni_h_w((__m128i)c, (__m128i)d, 0);
|
||||
return __lsx_vssrlni_b_h((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc pcast<Packet4i, Packet16uc>(const Packet4i& a, const Packet4i& b, const Packet4i& c,
|
||||
const Packet4i& d) {
|
||||
Packet8s tmp1 = __lsx_vssrlni_h_w((__m128i)a, (__m128i)b, 0);
|
||||
Packet8s tmp2 = __lsx_vssrlni_h_w((__m128i)c, (__m128i)d, 0);
|
||||
return (Packet16uc)__lsx_vssrlni_b_h((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet4ui, Packet4f>(const Packet4ui& a) {
|
||||
return __lsx_vffint_s_wu(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul pcast<Packet4ui, Packet2ul>(const Packet4ui& a) {
|
||||
return __lsx_vsllwil_du_wu((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet4ui, Packet2l>(const Packet4ui& a) {
|
||||
return (Packet2l)__lsx_vsllwil_du_wu((__m128i)a, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us pcast<Packet4ui, Packet8us>(const Packet4ui& a, const Packet4ui& b) {
|
||||
return __lsx_vssrlni_hu_w((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s pcast<Packet4ui, Packet8s>(const Packet4ui& a, const Packet4ui& b) {
|
||||
return (Packet8s)__lsx_vssrlni_hu_w((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc pcast<Packet4ui, Packet16uc>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c,
|
||||
const Packet4ui& d) {
|
||||
Packet8us tmp1 = __lsx_vssrlni_hu_w((__m128i)a, (__m128i)b, 0);
|
||||
Packet8us tmp2 = __lsx_vssrlni_hu_w((__m128i)c, (__m128i)d, 0);
|
||||
return __lsx_vssrlni_bu_h((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c pcast<Packet4ui, Packet16c>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c,
|
||||
const Packet4ui& d) {
|
||||
Packet8us tmp1 = __lsx_vssrlni_hu_w((__m128i)a, (__m128i)b, 0);
|
||||
Packet8us tmp2 = __lsx_vssrlni_hu_w((__m128i)c, (__m128i)d, 0);
|
||||
return (Packet16c)__lsx_vssrlni_bu_h((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet2l, Packet4f>(const Packet2l& a, const Packet2l& b) {
|
||||
return __lsx_vffint_s_w(__lsx_vssrlni_w_d((__m128i)a, (__m128i)b, 0));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pcast<Packet2l, Packet4i>(const Packet2l& a, const Packet2l& b) {
|
||||
return __lsx_vssrlni_w_d((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui pcast<Packet2l, Packet4ui>(const Packet2l& a, const Packet2l& b) {
|
||||
return (Packet4ui)__lsx_vssrlni_w_d((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s pcast<Packet2l, Packet8s>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
|
||||
const Packet2l& d) {
|
||||
Packet4i tmp1 = __lsx_vssrlni_w_d((__m128i)a, (__m128i)b, 0);
|
||||
Packet4i tmp2 = __lsx_vssrlni_w_d((__m128i)c, (__m128i)d, 0);
|
||||
return __lsx_vssrlni_h_w((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us pcast<Packet2l, Packet8us>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
|
||||
const Packet2l& d) {
|
||||
Packet4i tmp1 = __lsx_vssrlni_w_d((__m128i)a, (__m128i)b, 0);
|
||||
Packet4i tmp2 = __lsx_vssrlni_w_d((__m128i)c, (__m128i)d, 0);
|
||||
return (Packet8us)__lsx_vssrlni_h_w((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c pcast<Packet2l, Packet16c>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
|
||||
const Packet2l& d, const Packet2l& e, const Packet2l& f,
|
||||
const Packet2l& g, const Packet2l& h) {
|
||||
const Packet8s abcd = pcast<Packet2l, Packet8s>(a, b, c, d);
|
||||
const Packet8s efgh = pcast<Packet2l, Packet8s>(e, f, g, h);
|
||||
return __lsx_vssrlni_b_h((__m128i)abcd, (__m128i)efgh, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc pcast<Packet2l, Packet16uc>(const Packet2l& a, const Packet2l& b, const Packet2l& c,
|
||||
const Packet2l& d, const Packet2l& e, const Packet2l& f,
|
||||
const Packet2l& g, const Packet2l& h) {
|
||||
const Packet8us abcd = pcast<Packet2l, Packet8us>(a, b, c, d);
|
||||
const Packet8us efgh = pcast<Packet2l, Packet8us>(e, f, g, h);
|
||||
return __lsx_vssrlni_bu_h((__m128i)abcd, (__m128i)efgh, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet2ul, Packet4f>(const Packet2ul& a, const Packet2ul& b) {
|
||||
return __lsx_vffint_s_wu(__lsx_vssrlni_w_d((__m128i)a, (__m128i)b, 0));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui pcast<Packet2ul, Packet4ui>(const Packet2ul& a, const Packet2ul& b) {
|
||||
return __lsx_vssrlni_wu_d((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pcast<Packet2ul, Packet4i>(const Packet2ul& a, const Packet2ul& b) {
|
||||
return (Packet4i)__lsx_vssrlni_wu_d((__m128i)a, (__m128i)b, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us pcast<Packet2ul, Packet8us>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
|
||||
const Packet2ul& d) {
|
||||
Packet4ui tmp1 = __lsx_vssrlni_wu_d((__m128i)a, (__m128i)b, 0);
|
||||
Packet4ui tmp2 = __lsx_vssrlni_wu_d((__m128i)c, (__m128i)d, 0);
|
||||
return __lsx_vssrlni_hu_w((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s pcast<Packet2ul, Packet8s>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
|
||||
const Packet2ul& d) {
|
||||
Packet4ui tmp1 = __lsx_vssrlni_wu_d((__m128i)a, (__m128i)b, 0);
|
||||
Packet4ui tmp2 = __lsx_vssrlni_wu_d((__m128i)c, (__m128i)d, 0);
|
||||
return (Packet8s)__lsx_vssrlni_hu_w((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc pcast<Packet2ul, Packet16uc>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
|
||||
const Packet2ul& d, const Packet2ul& e, const Packet2ul& f,
|
||||
const Packet2ul& g, const Packet2ul& h) {
|
||||
const Packet8s abcd = pcast<Packet2ul, Packet8s>(a, b, c, d);
|
||||
const Packet8s efgh = pcast<Packet2ul, Packet8s>(e, f, g, h);
|
||||
return __lsx_vssrlni_b_h((__m128i)abcd, (__m128i)efgh, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c pcast<Packet2ul, Packet16c>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
|
||||
const Packet2ul& d, const Packet2ul& e, const Packet2ul& f,
|
||||
const Packet2ul& g, const Packet2ul& h) {
|
||||
const Packet8us abcd = pcast<Packet2ul, Packet8us>(a, b, c, d);
|
||||
const Packet8us efgh = pcast<Packet2ul, Packet8us>(e, f, g, h);
|
||||
return __lsx_vssrlni_bu_h((__m128i)abcd, (__m128i)efgh, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
|
||||
return __lsx_vfcvt_s_d(b, a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2l pcast<Packet2d, Packet2l>(const Packet2d& a) {
|
||||
return __lsx_vftint_l_d(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2ul pcast<Packet2d, Packet2ul>(const Packet2d& a) {
|
||||
return __lsx_vftint_lu_d(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {
|
||||
return __lsx_vssrlni_w_d(__lsx_vftint_l_d(a), __lsx_vftint_l_d(b), 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4ui pcast<Packet2d, Packet4ui>(const Packet2d& a, const Packet2d& b) {
|
||||
return __lsx_vssrlni_wu_d(__lsx_vftint_lu_d(a), __lsx_vftint_lu_d(b), 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8s pcast<Packet2d, Packet8s>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
|
||||
const Packet2d& d) {
|
||||
Packet4i tmp1 = __lsx_vssrlni_w_d(__lsx_vftint_l_d(a), __lsx_vftint_l_d(b), 0);
|
||||
Packet4i tmp2 = __lsx_vssrlni_w_d(__lsx_vftint_l_d(c), __lsx_vftint_l_d(d), 0);
|
||||
return __lsx_vssrlni_h_w((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8us pcast<Packet2d, Packet8us>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
|
||||
const Packet2d& d) {
|
||||
Packet4ui tmp1 = __lsx_vssrlni_wu_d(__lsx_vftint_lu_d(a), __lsx_vftint_lu_d(b), 0);
|
||||
Packet4ui tmp2 = __lsx_vssrlni_wu_d(__lsx_vftint_lu_d(c), __lsx_vftint_lu_d(d), 0);
|
||||
return __lsx_vssrlni_hu_w((__m128i)tmp1, (__m128i)tmp2, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c pcast<Packet2d, Packet16c>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
|
||||
const Packet2d& d, const Packet2d& e, const Packet2d& f,
|
||||
const Packet2d& g, const Packet2d& h) {
|
||||
const Packet8s abcd = pcast<Packet2d, Packet8s>(a, b, c, d);
|
||||
const Packet8s efgh = pcast<Packet2d, Packet8s>(e, f, g, h);
|
||||
return __lsx_vssrlni_b_h((__m128i)abcd, (__m128i)efgh, 0);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16uc pcast<Packet2d, Packet16uc>(const Packet2d& a, const Packet2d& b, const Packet2d& c,
|
||||
const Packet2d& d, const Packet2d& e, const Packet2d& f,
|
||||
const Packet2d& g, const Packet2d& h) {
|
||||
const Packet8us abcd = pcast<Packet2d, Packet8us>(a, b, c, d);
|
||||
const Packet8us efgh = pcast<Packet2d, Packet8us>(e, f, g, h);
|
||||
return __lsx_vssrlni_bu_h((__m128i)abcd, (__m128i)efgh, 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
|
||||
return __lsx_vfcvtl_d_s(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet16c, Packet2d>(const Packet16c& a) {
|
||||
Packet8s tmp1 = __lsx_vsllwil_h_b((__m128i)a, 0);
|
||||
Packet4i tmp2 = __lsx_vsllwil_w_h((__m128i)tmp1, 0);
|
||||
return __lsx_vffint_d_l(__lsx_vsllwil_d_w((__m128i)tmp2, 0));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet16uc, Packet2d>(const Packet16uc& a) {
|
||||
Packet8us tmp1 = __lsx_vsllwil_hu_bu((__m128i)a, 0);
|
||||
Packet4ui tmp2 = __lsx_vsllwil_wu_hu((__m128i)tmp1, 0);
|
||||
return __lsx_vffint_d_lu(__lsx_vsllwil_du_wu((__m128i)tmp2, 0));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet8s, Packet2d>(const Packet8s& a) {
|
||||
Packet4i tmp = __lsx_vsllwil_w_h((__m128i)a, 0);
|
||||
return __lsx_vffint_d_l(__lsx_vsllwil_d_w((__m128i)tmp, 0));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet8us, Packet2d>(const Packet8us& a) {
|
||||
Packet4ui tmp = __lsx_vsllwil_wu_hu((__m128i)a, 0);
|
||||
return __lsx_vffint_d_lu(__lsx_vsllwil_du_wu((__m128i)tmp, 0));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet4i, Packet2d>(const Packet4i& a) {
|
||||
return __lsx_vffint_d_l(__lsx_vsllwil_d_w((__m128i)a, 0));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet4ui, Packet2d>(const Packet4ui& a) {
|
||||
return __lsx_vffint_d_lu(__lsx_vsllwil_du_wu((__m128i)a, 0));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet2l, Packet2d>(const Packet2l& a) {
|
||||
return __lsx_vffint_d_l(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet2ul, Packet2d>(const Packet2ul& a) {
|
||||
return __lsx_vffint_d_lu(a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_TYPE_CASTING_LSX_H
|
@ -1117,7 +1117,7 @@ struct lhs_process_one_packet {
|
||||
// loops on each largest micro horizontal panel of lhs
|
||||
// (LhsProgress x depth)
|
||||
for (Index i = peelStart; i < peelEnd; i += LhsProgress) {
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#if EIGEN_ARCH_ARM64 || EIGEN_ARCH_LOONGARCH64
|
||||
EIGEN_IF_CONSTEXPR(nr >= 8) {
|
||||
for (Index j2 = 0; j2 < packet_cols8; j2 += 8) {
|
||||
const LhsScalar* blA = &blockA[i * strideA + offsetA * (LhsProgress)];
|
||||
@ -1467,7 +1467,7 @@ EIGEN_DONT_INLINE void gebp_kernel<LhsScalar, RhsScalar, Index, DataMapper, mr,
|
||||
(depth * sizeof(LhsScalar) * 3 * LhsProgress)));
|
||||
for (Index i1 = 0; i1 < peeled_mc3; i1 += actual_panel_rows) {
|
||||
const Index actual_panel_end = (std::min)(i1 + actual_panel_rows, peeled_mc3);
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#if EIGEN_ARCH_ARM64 || EIGEN_ARCH_LOONGARCH64
|
||||
EIGEN_IF_CONSTEXPR(nr >= 8) {
|
||||
for (Index j2 = 0; j2 < packet_cols8; j2 += 8) {
|
||||
for (Index i = i1; i < actual_panel_end; i += 3 * LhsProgress) {
|
||||
@ -1935,7 +1935,7 @@ EIGEN_DONT_INLINE void gebp_kernel<LhsScalar, RhsScalar, Index, DataMapper, mr,
|
||||
|
||||
for (Index i1 = peeled_mc3; i1 < peeled_mc2; i1 += actual_panel_rows) {
|
||||
Index actual_panel_end = (std::min)(i1 + actual_panel_rows, peeled_mc2);
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#if EIGEN_ARCH_ARM64 || EIGEN_ARCH_LOONGARCH64
|
||||
EIGEN_IF_CONSTEXPR(nr >= 8) {
|
||||
for (Index j2 = 0; j2 < packet_cols8; j2 += 8) {
|
||||
for (Index i = i1; i < actual_panel_end; i += 2 * LhsProgress) {
|
||||
@ -2326,7 +2326,7 @@ EIGEN_DONT_INLINE void gebp_kernel<LhsScalar, RhsScalar, Index, DataMapper, mr,
|
||||
}
|
||||
//---------- Process remaining rows, 1 at once ----------
|
||||
if (peeled_mc_quarter < rows) {
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#if EIGEN_ARCH_ARM64 || EIGEN_ARCH_LOONGARCH64
|
||||
EIGEN_IF_CONSTEXPR(nr >= 8) {
|
||||
// loop on each panel of the rhs
|
||||
for (Index j2 = 0; j2 < packet_cols8; j2 += 8) {
|
||||
@ -2852,7 +2852,7 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Co
|
||||
Index count = 0;
|
||||
const Index peeled_k = (depth / PacketSize) * PacketSize;
|
||||
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#if EIGEN_ARCH_ARM64 || EIGEN_ARCH_LOONGARCH64
|
||||
EIGEN_IF_CONSTEXPR(nr >= 8) {
|
||||
for (Index j2 = 0; j2 < packet_cols8; j2 += 8) {
|
||||
// skip what we have before
|
||||
@ -3035,7 +3035,7 @@ struct gemm_pack_rhs<Scalar, Index, DataMapper, nr, RowMajor, Conjugate, PanelMo
|
||||
Index packet_cols4 = nr >= 4 ? (cols / 4) * 4 : 0;
|
||||
Index count = 0;
|
||||
|
||||
#if EIGEN_ARCH_ARM64
|
||||
#if EIGEN_ARCH_ARM64 || EIGEN_ARCH_LOONGARCH64
|
||||
EIGEN_IF_CONSTEXPR(nr >= 8) {
|
||||
for (Index j2 = 0; j2 < packet_cols8; j2 += 8) {
|
||||
// skip what we have before
|
||||
|
@ -100,8 +100,8 @@
|
||||
// certain common platform (compiler+architecture combinations) to avoid these problems.
|
||||
// Only static alignment is really problematic (relies on nonstandard compiler extensions),
|
||||
// try to keep heap alignment even when we have to disable static alignment.
|
||||
#if EIGEN_COMP_GNUC && \
|
||||
!(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
|
||||
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || \
|
||||
EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64)
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
|
||||
#else
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
|
||||
@ -430,6 +430,12 @@ extern "C" {
|
||||
#include <msa.h>
|
||||
#endif
|
||||
|
||||
#elif (defined __loongarch64 && defined __loongarch_sx)
|
||||
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_LSX
|
||||
#include <lsxintrin.h>
|
||||
|
||||
#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
|
||||
|
||||
#define EIGEN_VECTORIZE
|
||||
@ -520,6 +526,8 @@ inline static const char *SimdInstructionSetsInUse(void) {
|
||||
return "S390X ZVECTOR";
|
||||
#elif defined(EIGEN_VECTORIZE_MSA)
|
||||
return "MIPS MSA";
|
||||
#elif defined(EIGEN_VECTORIZE_LSX)
|
||||
return "LOONGARCH64 LSX";
|
||||
#else
|
||||
return "None";
|
||||
#endif
|
||||
|
@ -474,6 +474,7 @@ enum Type {
|
||||
MSA = 0x5,
|
||||
SVE = 0x6,
|
||||
HVX = 0x7,
|
||||
LSX = 0x8,
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
Target = SSE
|
||||
#elif defined EIGEN_VECTORIZE_ALTIVEC
|
||||
@ -488,6 +489,8 @@ enum Type {
|
||||
Target = MSA
|
||||
#elif defined EIGEN_VECTORIZE_HVX
|
||||
Target = HVX
|
||||
#elif defined EIGEN_VECTORIZE_LSX
|
||||
Target = LSX
|
||||
#else
|
||||
Target = Generic
|
||||
#endif
|
||||
|
@ -376,6 +376,13 @@
|
||||
#define EIGEN_ARCH_MIPS 0
|
||||
#endif
|
||||
|
||||
/// \internal EIGEN_ARCH_LOONGARCH64 set to 1 if the architecture is LOONGARCH64
|
||||
#if defined(__loongarch64)
|
||||
#define EIGEN_ARCH_LOONGARCH64 1
|
||||
#else
|
||||
#define EIGEN_ARCH_LOONGARCH64 0
|
||||
#endif
|
||||
|
||||
/// \internal EIGEN_ARCH_SPARC set to 1 if the architecture is SPARC
|
||||
#if defined(__sparc__) || defined(__sparc)
|
||||
#define EIGEN_ARCH_SPARC 1
|
||||
|
@ -296,6 +296,30 @@ build:linux:cross:ppc64le:clang-12:default:
|
||||
EIGEN_CI_CXX_COMPILER: clang++-12
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-powerpc64le-linux-gnu clang-12
|
||||
|
||||
######## loongarch64 #################################################
|
||||
|
||||
.build:linux:cross:loongarch64:
|
||||
extends: .build:linux:cross
|
||||
variables:
|
||||
EIGEN_CI_TARGET_ARCH: loongarch64
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: loongarch64-linux-gnu
|
||||
tags:
|
||||
- eigen-runner
|
||||
- linux
|
||||
- x86-64
|
||||
|
||||
# GCC-14 (minimum on Ubuntu 24)
|
||||
build:linux:cross:loongarch64:gcc-14:default:
|
||||
extends: .build:linux:cross:loongarch64
|
||||
image: ubuntu:24.04
|
||||
variables:
|
||||
EIGEN_CI_C_COMPILER: gcc-14
|
||||
EIGEN_CI_CXX_COMPILER: g++-14
|
||||
EIGEN_CI_CROSS_INSTALL: g++-14-loongarch64-linux-gnu gcc-14-loongarch64-linux-gnu
|
||||
EIGEN_CI_CROSS_C_COMPILER: loongarch64-linux-gnu-gcc-14
|
||||
EIGEN_CI_CROSS_CXX_COMPILER: loongarch64-linux-gnu-g++-14
|
||||
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_LSX=on"
|
||||
|
||||
######## MR Smoke Tests ########################################################
|
||||
|
||||
build:linux:cross:x86-64:gcc-10:default:smoketest:
|
||||
|
@ -415,6 +415,37 @@ test:linux:ppc64le:clang-12:default:unsupported:
|
||||
variables:
|
||||
EIGEN_CI_TEST_LABEL: Unsupported
|
||||
|
||||
##### loongarch64 ###################################################################
|
||||
.test:linux:loongarch64:
|
||||
extends: .test:linux
|
||||
variables:
|
||||
EIGEN_CI_TARGET_ARCH: loongarch64
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: loongarch64-linux-gnu
|
||||
# Install QEMU and set up the execution environment in the image
|
||||
EIGEN_CI_BEFORE_SCRIPT: "apt-get update && apt-get install g++-14-loongarch64-linux-gnu gcc-14-loongarch64-linux-gnu qemu-user-static -y && \
|
||||
ln -sf /usr/loongarch64-linux-gnu/lib64/ld-linux-loongarch-lp64d.so.1 /lib64/ld-linux-loongarch-lp64d.so.1 && \
|
||||
export LD_LIBRARY_PATH=/usr/loongarch64-linux-gnu/lib:$LD_LIBRARY_PAT"
|
||||
tags:
|
||||
- eigen-runner
|
||||
- linux
|
||||
- x86-64
|
||||
|
||||
# GCC-14 (Ubuntu 24)
|
||||
.test:linux:loongarch64:gcc-14:default:
|
||||
extends: .test:linux:loongarch64
|
||||
image: ubuntu:24.04
|
||||
needs: [ build:linux:cross:loongarch64:gcc-14:default ]
|
||||
|
||||
test:linux:loongarch64:gcc-14:default:official:
|
||||
extends: .test:linux:loongarch64:gcc-14:default
|
||||
variables:
|
||||
EIGEN_CI_TEST_LABEL: Official
|
||||
|
||||
test:linux:loongarch64:gcc-14:default:unsupported:
|
||||
extends: .test:linux:loongarch64:gcc-14:default
|
||||
variables:
|
||||
EIGEN_CI_TEST_LABEL: Unsupported
|
||||
|
||||
##### MR Smoke Tests ###########################################################
|
||||
|
||||
test:linux:x86-64:gcc-10:default:smoketest:
|
||||
|
@ -367,6 +367,12 @@ macro(ei_testing_print_summary)
|
||||
message(STATUS "S390X ZVECTOR: Using architecture defaults")
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_LSX)
|
||||
message(STATUS "LSX: ON")
|
||||
else()
|
||||
message(STATUS "LSX: Using architecture defaults")
|
||||
endif()
|
||||
|
||||
if(EIGEN_TEST_SYCL)
|
||||
if(EIGEN_SYCL_TRISYCL)
|
||||
message(STATUS "SYCL: ON (using triSYCL)")
|
||||
|
Loading…
x
Reference in New Issue
Block a user