mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-09 02:09:01 +08:00
Initial RVV_Chip
This commit is contained in:
parent
7294434099
commit
dbdb94da81
12
Eigen/Core
12
Eigen/Core
@ -249,6 +249,14 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/SVE/PacketMath.h"
|
||||
#include "src/Core/arch/SVE/TypeCasting.h"
|
||||
#include "src/Core/arch/SVE/MathFunctions.h"
|
||||
#elif defined EIGEN_VECTORIZE_RVV10
|
||||
#include "src/Core/arch/RVV10/PacketMath.h"
|
||||
#include "src/Core/arch/RVV10/TypeCasting.h"
|
||||
#include "src/Core/arch/RVV10/MathFunctions.h"
|
||||
#include "src/Core/arch/RVV10/Complex.h"
|
||||
#if defined EIGEN_VECTORIZE_RVV10FP16
|
||||
#include "src/Core/arch/RVV10/PacketMathFP16.h"
|
||||
#endif
|
||||
#elif defined EIGEN_VECTORIZE_ZVECTOR
|
||||
#include "src/Core/arch/ZVector/PacketMath.h"
|
||||
#include "src/Core/arch/ZVector/MathFunctions.h"
|
||||
@ -396,6 +404,10 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/AVX512/GemmKernel.h"
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_VECTORIZE_RVV10)
|
||||
#include "src/Core/arch/RVV10/GeneralBlockPanelKernel.h"
|
||||
#endif
|
||||
|
||||
#include "src/Core/Select.h"
|
||||
#include "src/Core/VectorwiseOp.h"
|
||||
#include "src/Core/PartialReduxEvaluator.h"
|
||||
|
@ -64,8 +64,13 @@ struct copy_using_evaluator_traits {
|
||||
static constexpr int OuterStride = outer_stride_at_compile_time<Dst>::ret;
|
||||
|
||||
// TODO distinguish between linear traversal and inner-traversals
|
||||
#ifdef EIGEN_RISCV64_USE_RVV10
|
||||
using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize, 4>::type;
|
||||
using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize, 4>::type;
|
||||
#else
|
||||
using LinearPacketType = typename find_best_packet<DstScalar, RestrictedLinearSize>::type;
|
||||
using InnerPacketType = typename find_best_packet<DstScalar, RestrictedInnerSize>::type;
|
||||
#endif
|
||||
|
||||
static constexpr int LinearPacketSize = unpacket_traits<LinearPacketType>::size;
|
||||
static constexpr int InnerPacketSize = unpacket_traits<InnerPacketType>::size;
|
||||
|
@ -1367,7 +1367,11 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>>
|
||||
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||
#ifdef EIGEN_RISCV64_USE_RVV10
|
||||
typedef typename packet_traits<Scalar, 4>::type PacketScalar;
|
||||
#else
|
||||
typedef typename packet_traits<Scalar>::type PacketScalar;
|
||||
#endif
|
||||
|
||||
enum {
|
||||
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
|
||||
|
@ -105,7 +105,7 @@ struct default_packet_traits {
|
||||
};
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
template <typename T, int LMul>
|
||||
struct packet_traits : default_packet_traits {
|
||||
typedef T type;
|
||||
typedef T half;
|
||||
|
@ -533,8 +533,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
||||
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
|
||||
};
|
||||
|
||||
#ifdef EIGEN_RISCV64_USE_RVV10
|
||||
typedef typename find_best_packet<Scalar, RowsAtCompileTime, 4>::type LhsVecPacketType;
|
||||
typedef typename find_best_packet<Scalar, ColsAtCompileTime, 4>::type RhsVecPacketType;
|
||||
#else
|
||||
typedef typename find_best_packet<Scalar, RowsAtCompileTime>::type LhsVecPacketType;
|
||||
typedef typename find_best_packet<Scalar, ColsAtCompileTime>::type RhsVecPacketType;
|
||||
#endif
|
||||
|
||||
enum {
|
||||
|
||||
|
@ -29,7 +29,11 @@ namespace internal {
|
||||
template <typename Func, typename Evaluator>
|
||||
struct redux_traits {
|
||||
public:
|
||||
#ifdef EIGEN_RISCV64_USE_RVV10
|
||||
typedef typename find_best_packet<typename Evaluator::Scalar, Evaluator::SizeAtCompileTime, 4>::type PacketType;
|
||||
#else
|
||||
typedef typename find_best_packet<typename Evaluator::Scalar, Evaluator::SizeAtCompileTime>::type PacketType;
|
||||
#endif
|
||||
enum {
|
||||
PacketSize = unpacket_traits<PacketType>::size,
|
||||
InnerMaxSize = int(Evaluator::IsRowMajor) ? Evaluator::MaxColsAtCompileTime : Evaluator::MaxRowsAtCompileTime,
|
||||
|
713
Eigen/src/Core/arch/RVV10/Complex.h
Normal file
713
Eigen/src/Core/arch/RVV10/Complex.h
Normal file
@ -0,0 +1,713 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2025 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_COMPLEX_RVV10_H
|
||||
#define EIGEN_COMPLEX_RVV10_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/********************************* float32 ************************************/
|
||||
|
||||
struct PacketXcf {
|
||||
EIGEN_STRONG_INLINE PacketXcf() {}
|
||||
EIGEN_STRONG_INLINE explicit PacketXcf(const PacketXf& _real, const PacketXf& _imag) : real(_real), imag(_imag) {}
|
||||
EIGEN_STRONG_INLINE explicit PacketXcf(const PacketMul2Xf& a)
|
||||
: real(__riscv_vget_v_f32m2_f32m1(a, 0)), imag(__riscv_vget_v_f32m2_f32m1(a, 1)) {}
|
||||
PacketXf real;
|
||||
PacketXf imag;
|
||||
};
|
||||
|
||||
template <int LMul>
|
||||
struct packet_traits<std::complex<float>, LMul> : default_packet_traits {
|
||||
typedef PacketXcf type;
|
||||
typedef PacketXcf half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 1>::size,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasSqrt = 1,
|
||||
HasSign = 0,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasLog = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<PacketXcf> {
|
||||
typedef std::complex<float> type;
|
||||
typedef PacketXcf half;
|
||||
typedef PacketMul2Xf as_real;
|
||||
enum {
|
||||
size = rvv_packet_size_selector<float, EIGEN_RISCV64_RVV_VL, 1>::size,
|
||||
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
|
||||
vectorizable = true,
|
||||
masked_load_available = false,
|
||||
masked_store_available = false
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pcast<PacketMul2Xf, PacketXcf>(const PacketMul2Xf& a) {
|
||||
return PacketXcf(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xf pcast<PacketXcf, PacketMul2Xf>(const PacketXcf& a) {
|
||||
return __riscv_vcreate_v_f32m1_f32m2(a.real, a.imag);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pset1<PacketXcf>(const std::complex<float>& from) {
|
||||
PacketXf real = pset1<PacketXf>(from.real());
|
||||
PacketXf imag = pset1<PacketXf>(from.imag());
|
||||
return PacketXcf(real, imag);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf padd<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
|
||||
return PacketXcf(padd<PacketXf>(a.real, b.real), padd<PacketXf>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf psub<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
|
||||
return PacketXcf(psub<PacketXf>(a.real, b.real), psub<PacketXf>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pnegate(const PacketXcf& a) {
|
||||
return PacketXcf(pnegate<PacketXf>(a.real), pnegate<PacketXf>(a.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pconj(const PacketXcf& a) {
|
||||
return PacketXcf(
|
||||
a.real, __riscv_vreinterpret_v_u32m1_f32m1(__riscv_vxor_vx_u32m1(__riscv_vreinterpret_v_f32m1_u32m1(a.imag),
|
||||
0x80000000, unpacket_traits<PacketXf>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pmul<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
|
||||
PacketXf v1 = pmul<PacketXf>(a.real, b.real);
|
||||
PacketXf v2 = pmul<PacketXf>(a.imag, b.imag);
|
||||
PacketXf v3 = pmul<PacketXf>(a.real, b.imag);
|
||||
PacketXf v4 = pmul<PacketXf>(a.imag, b.real);
|
||||
return PacketXcf(psub<PacketXf>(v1, v2), padd<PacketXf>(v3, v4));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pmadd<PacketXcf>(const PacketXcf& a, const PacketXcf& b, const PacketXcf& c) {
|
||||
PacketXf v1 = pmadd<PacketXf>(a.real, b.real, c.real);
|
||||
PacketXf v2 = pmul<PacketXf>(a.imag, b.imag);
|
||||
PacketXf v3 = pmadd<PacketXf>(a.real, b.imag, c.imag);
|
||||
PacketXf v4 = pmul<PacketXf>(a.imag, b.real);
|
||||
return PacketXcf(psub<PacketXf>(v1, v2), padd<PacketXf>(v3, v4));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pcmp_eq(const PacketXcf& a, const PacketXcf& b) {
|
||||
PacketMask32 eq_both = pand<PacketMask32>(pcmp_eq_mask(a.real, b.real), pcmp_eq_mask(a.imag, b.imag));
|
||||
PacketXf res = pselect(eq_both, ptrue<PacketXf>(a.real), pzero<PacketXf>(a.real));
|
||||
return PacketXcf(res, res);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pand<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
|
||||
return PacketXcf(pand<PacketXf>(a.real, b.real), pand<PacketXf>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf por<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
|
||||
return PacketXcf(por<PacketXf>(a.real, b.real), por<PacketXf>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pxor<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
|
||||
return PacketXcf(pxor<PacketXf>(a.real, b.real), pxor<PacketXf>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pandnot<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
|
||||
return PacketXcf(pandnot<PacketXf>(a.real, b.real), pandnot<PacketXf>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pload<PacketXcf>(const std::complex<float>* from) {
|
||||
vfloat32m1x2_t res = __riscv_vlseg2e32_v_f32m1x2((const float*)from, unpacket_traits<PacketXf>::size);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return PacketXcf(__riscv_vget_v_f32m1x2_f32m1(res, 0), __riscv_vget_v_f32m1x2_f32m1(res, 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf ploadu<PacketXcf>(const std::complex<float>* from) {
|
||||
vfloat32m1x2_t res = __riscv_vlseg2e32_v_f32m1x2((const float*)from, unpacket_traits<PacketXf>::size);
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return PacketXcf(__riscv_vget_v_f32m1x2_f32m1(res, 0),
|
||||
__riscv_vget_v_f32m1x2_f32m1(res, 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf ploaddup<PacketXcf>(const std::complex<float>* from) {
|
||||
PacketXu real_idx = __riscv_vid_v_u32m1(unpacket_traits<PacketXf>::size);
|
||||
real_idx = __riscv_vsll_vx_u32m1(__riscv_vand_vx_u32m1(real_idx, 0xfffffffeu, unpacket_traits<PacketXf>::size), 2,
|
||||
unpacket_traits<PacketXf>::size);
|
||||
PacketXu imag_idx = __riscv_vadd_vx_u32m1(real_idx, sizeof(float), unpacket_traits<PacketXf>::size);
|
||||
// real_idx = 0 0 2*sizeof(float) 2*sizeof(float) 4*sizeof(float) 4*sizeof(float) ...
|
||||
return PacketXcf(__riscv_vloxei32_v_f32m1((const float*)from, real_idx, unpacket_traits<PacketXf>::size),
|
||||
__riscv_vloxei32_v_f32m1((const float*)from, imag_idx, unpacket_traits<PacketXf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf ploadquad<PacketXcf>(const std::complex<float>* from) {
|
||||
PacketXu real_idx = __riscv_vid_v_u32m1(unpacket_traits<PacketXf>::size);
|
||||
real_idx = __riscv_vsll_vx_u32m1(__riscv_vand_vx_u32m1(real_idx, 0xfffffffcu, unpacket_traits<PacketXf>::size), 1,
|
||||
unpacket_traits<PacketXf>::size);
|
||||
PacketXu imag_idx = __riscv_vadd_vx_u32m1(real_idx, sizeof(float), unpacket_traits<PacketXf>::size);
|
||||
// real_idx = 0 0 2*sizeof(float) 2*sizeof(float) 4*sizeof(float) 4*sizeof(float) ...
|
||||
return PacketXcf(__riscv_vloxei32_v_f32m1((const float*)from, real_idx, unpacket_traits<PacketXf>::size),
|
||||
__riscv_vloxei32_v_f32m1((const float*)from, imag_idx, unpacket_traits<PacketXf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<float> >(std::complex<float>* to, const PacketXcf& from) {
|
||||
vfloat32m1x2_t vx2 = __riscv_vundefined_f32m1x2();
|
||||
vx2 = __riscv_vset_v_f32m1_f32m1x2(vx2, 0, from.real);
|
||||
vx2 = __riscv_vset_v_f32m1_f32m1x2(vx2, 1, from.imag);
|
||||
EIGEN_DEBUG_ALIGNED_STORE __riscv_vsseg2e32_v_f32m1x2((float*)to, vx2, unpacket_traits<PacketXcf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const PacketXcf& from) {
|
||||
vfloat32m1x2_t vx2 = __riscv_vundefined_f32m1x2();
|
||||
vx2 = __riscv_vset_v_f32m1_f32m1x2(vx2, 0, from.real);
|
||||
vx2 = __riscv_vset_v_f32m1_f32m1x2(vx2, 1, from.imag);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vsseg2e32_v_f32m1x2((float*)to, vx2, unpacket_traits<PacketXf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline PacketXcf pgather<std::complex<float>, PacketXcf>(const std::complex<float>* from,
|
||||
Index stride) {
|
||||
vfloat32m1x2_t res =
|
||||
__riscv_vlsseg2e32_v_f32m1x2((const float*)from, 2 * stride * sizeof(float), unpacket_traits<PacketXf>::size);
|
||||
return PacketXcf(__riscv_vget_v_f32m1x2_f32m1(res, 0), __riscv_vget_v_f32m1x2_f32m1(res, 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, PacketXcf>(std::complex<float>* to, const PacketXcf& from,
|
||||
Index stride) {
|
||||
vfloat32m1x2_t from_rvv_type = __riscv_vundefined_f32m1x2();
|
||||
from_rvv_type = __riscv_vset_v_f32m1_f32m1x2(from_rvv_type, 0, from.real);
|
||||
from_rvv_type = __riscv_vset_v_f32m1_f32m1x2(from_rvv_type, 1, from.imag);
|
||||
__riscv_vssseg2e32_v_f32m1x2((float*)to, 2 * stride * sizeof(float), from_rvv_type, unpacket_traits<PacketXf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> pfirst<PacketXcf>(const PacketXcf& a) {
|
||||
return std::complex<float>(pfirst<PacketXf>(a.real), pfirst<PacketXf>(a.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf preverse(const PacketXcf& a) {
|
||||
return PacketXcf(preverse<PacketXf>(a.real), preverse<PacketXf>(a.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pcplxflip<PacketXcf>(const PacketXcf& a) {
|
||||
return PacketXcf(a.imag, a.real);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<float> predux<PacketXcf>(const PacketXcf& a) {
|
||||
return std::complex<float>(predux<PacketXf>(a.real), predux<PacketXf>(a.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf pdiv<PacketXcf>(const PacketXcf& a, const PacketXcf& b) {
|
||||
PacketXcf b_conj = pconj<PacketXcf>(b);
|
||||
PacketXcf dividend = pmul<PacketXcf>(a, b_conj);
|
||||
PacketXf divider = psub<PacketXf>(pmul<PacketXf>(b.real, b_conj.real), pmul<PacketXf>(b.imag, b_conj.imag));
|
||||
return PacketXcf(pdiv<PacketXf>(dividend.real, divider), pdiv<PacketXf>(dividend.imag, divider));
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXcf, N>& kernel) {
|
||||
float buffer_real[unpacket_traits<PacketXf>::size * N];
|
||||
float buffer_imag[unpacket_traits<PacketXf>::size * N];
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
__riscv_vsse32(&buffer_real[i], N * sizeof(float), kernel.packet[i].real, unpacket_traits<PacketXf>::size);
|
||||
__riscv_vsse32(&buffer_imag[i], N * sizeof(float), kernel.packet[i].imag, unpacket_traits<PacketXf>::size);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
kernel.packet[i].real =
|
||||
__riscv_vle32_v_f32m1(&buffer_real[i * unpacket_traits<PacketXf>::size], unpacket_traits<PacketXf>::size);
|
||||
kernel.packet[i].imag =
|
||||
__riscv_vle32_v_f32m1(&buffer_imag[i * unpacket_traits<PacketXf>::size], unpacket_traits<PacketXf>::size);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet psqrt_complex_rvv(const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
typedef typename Scalar::value_type RealScalar;
|
||||
typedef typename packet_traits<RealScalar>::type RealPacket;
|
||||
typedef typename unpacket_traits<RealPacket>::packet_mask PacketMask;
|
||||
|
||||
// Computes the principal sqrt of the complex numbers in the input.
|
||||
//
|
||||
// For example, for packets containing 2 complex numbers stored in
|
||||
// [real0, real1, imag0, imag1] format
|
||||
// a = [a0, a1] = [x0, x1, y0, y1],
|
||||
// where x0 = real(a0), y0 = imag(a0) etc., this function returns
|
||||
// b = [b0, b1] = [u0, u1, v0, v1],
|
||||
// such that b0^2 = a0, b1^2 = a1.
|
||||
//
|
||||
// To derive the formula for the complex square roots, let's consider the equation for
|
||||
// a single complex square root of the number x + i*y. We want to find real numbers
|
||||
// u and v such that
|
||||
// (u + i*v)^2 = x + i*y <=>
|
||||
// u^2 - v^2 + i*2*u*v = x + i*v.
|
||||
// By equating the real and imaginary parts we get:
|
||||
// u^2 - v^2 = x
|
||||
// 2*u*v = y.
|
||||
//
|
||||
// For x >= 0, this has the numerically stable solution
|
||||
// u = sqrt(0.5 * (x + sqrt(x^2 + y^2)))
|
||||
// v = 0.5 * (y / u)
|
||||
// and for x < 0,
|
||||
// v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))
|
||||
// u = 0.5 * (y / v)
|
||||
//
|
||||
// To avoid unnecessary over- and underflow, we compute sqrt(x^2 + y^2) as
|
||||
// l = max(|x|, |y|) * sqrt(1 + (min(|x|, |y|) / max(|x|, |y|))^2) ,
|
||||
|
||||
// In the following, without lack of generality, we have annotated the code, assuming
|
||||
// that the input is a packet of 2 complex numbers.
|
||||
//
|
||||
// Step 1. Compute l = [l0, l1], where
|
||||
// l0 = sqrt(x0^2 + y0^2), l1 = sqrt(x1^2 + y1^2)
|
||||
// To avoid over- and underflow, we use the stable formula for each hypotenuse
|
||||
// l0 = (min0 == 0 ? max0 : max0 * sqrt(1 + (min0/max0)**2)),
|
||||
// where max0 = max(|x0|, |y0|), min0 = min(|x0|, |y0|), and similarly for l1.
|
||||
|
||||
Packet a_abs = Packet(pabs(a.real), pabs(a.imag));
|
||||
RealPacket a_max = pmax(a_abs.real, a_abs.imag);
|
||||
RealPacket a_min = pmin(a_abs.real, a_abs.imag);
|
||||
|
||||
PacketMask a_min_zero_mask = pcmp_eq_mask(a_min, pzero(a_min));
|
||||
PacketMask a_max_zero_mask = pcmp_eq_mask(a_max, pzero(a_max));
|
||||
RealPacket r = pdiv(a_min, a_max);
|
||||
|
||||
const RealPacket cst_one = pset1<RealPacket>(RealScalar(1));
|
||||
const RealPacket cst_true = ptrue<RealPacket>(cst_one);
|
||||
RealPacket l = pmul(a_max, psqrt(padd(cst_one, pmul(r, r))));
|
||||
// Set l to a_max if a_min is zero.
|
||||
l = pselect(a_min_zero_mask, a_max, l);
|
||||
|
||||
// Step 2. Compute [rho0, rho1], where
|
||||
// rho0 = sqrt(0.5 * (l0 + |x0|)), rho1 = sqrt(0.5 * (l1 + |x1|))
|
||||
// We don't care about the imaginary parts computed here. They will be overwritten later.
|
||||
const RealPacket cst_half = pset1<RealPacket>(RealScalar(0.5));
|
||||
RealPacket rho = psqrt(pmul(cst_half, padd(a_abs.real, l)));
|
||||
|
||||
// Step 3. Compute [rho0, rho1, eta0, eta1], where
|
||||
// eta0 = (y0 / rho0) / 2, and eta1 = (y1 / rho1) / 2.
|
||||
// set eta = 0 of input is 0 + i0.
|
||||
RealPacket eta = pselect(a_max_zero_mask, pzero<RealPacket>(cst_one), pmul(cst_half, pdiv(a.imag, rho)));
|
||||
// Compute result for inputs with positive real part.
|
||||
Packet positive_real_result = Packet(rho, eta);
|
||||
|
||||
// Step 4. Compute solution for inputs with negative real part:
|
||||
// [|eta0| |eta1|, sign(y0)*rho0, sign(y1)*rho1]
|
||||
const RealPacket cst_imag_sign_mask = pset1<RealPacket>(RealScalar(-0.0));
|
||||
RealPacket imag_signs = pand(a.imag, cst_imag_sign_mask);
|
||||
Packet negative_real_result = Packet(pabs(eta), por(rho, imag_signs));
|
||||
|
||||
// Step 5. Select solution branch based on the sign of the real parts.
|
||||
PacketMask negative_real_mask_half = pcmp_lt_mask(a.real, pzero(a.real));
|
||||
Packet result = Packet(pselect(negative_real_mask_half, negative_real_result.real, positive_real_result.real),
|
||||
pselect(negative_real_mask_half, negative_real_result.imag, positive_real_result.imag));
|
||||
|
||||
// Step 6. Handle special cases for infinities:
|
||||
// * If z is (x,+∞), the result is (+∞,+∞) even if x is NaN
|
||||
// * If z is (x,-∞), the result is (+∞,-∞) even if x is NaN
|
||||
// * If z is (-∞,y), the result is (0*|y|,+∞) for finite or NaN y
|
||||
// * If z is (+∞,y), the result is (+∞,0*|y|) for finite or NaN y
|
||||
const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
|
||||
PacketMask is_real_inf = pcmp_eq_mask(a_abs.real, cst_pos_inf);
|
||||
// prepare packet of (+∞,0*|y|) or (0*|y|,+∞), depending on the sign of the infinite real part.
|
||||
const Packet cst_one_zero = pset1<Packet>(Scalar(RealScalar(1.0), RealScalar(0.0)));
|
||||
Packet real_inf_result = Packet(pmul(a_abs.real, cst_one_zero.real), pmul(a_abs.imag, cst_one_zero.imag));
|
||||
real_inf_result = Packet(pselect(negative_real_mask_half, real_inf_result.imag, real_inf_result.real),
|
||||
pselect(negative_real_mask_half, real_inf_result.real, real_inf_result.imag));
|
||||
// prepare packet of (+∞,+∞) or (+∞,-∞), depending on the sign of the infinite imaginary part.
|
||||
PacketMask is_imag_inf = pcmp_eq_mask(a_abs.imag, cst_pos_inf);
|
||||
// unless otherwise specified, if either the real or imaginary component is nan, the entire result is nan
|
||||
result = Packet(pselect(pcmp_eq_mask(result.real, result.real), result.real, cst_true),
|
||||
pselect(pcmp_eq_mask(result.imag, result.imag), result.imag, cst_true));
|
||||
|
||||
result = Packet(pselect(is_real_inf, real_inf_result.real, result.real),
|
||||
pselect(is_real_inf, real_inf_result.imag, result.imag));
|
||||
|
||||
return Packet(pselect(is_imag_inf, cst_pos_inf, result.real), pselect(is_imag_inf, a.imag, result.imag));
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_STRONG_INLINE Packet plog_complex_rvv(const Packet& x) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
typedef typename Scalar::value_type RealScalar;
|
||||
typedef typename packet_traits<RealScalar>::type RealPacket;
|
||||
typedef typename unpacket_traits<RealPacket>::packet_mask PacketMask;
|
||||
|
||||
// log(sqrt(a^2 + b^2)), atan2(b, a)
|
||||
RealPacket xlogr = plog(psqrt(padd(pmul<RealPacket>(x.real, x.real), pmul<RealPacket>(x.imag, x.imag))));
|
||||
RealPacket ximg = patan2(x.imag, x.real);
|
||||
|
||||
const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
|
||||
RealPacket r_abs = pabs(x.real);
|
||||
RealPacket i_abs = pabs(x.imag);
|
||||
PacketMask is_r_pos_inf = pcmp_eq_mask(r_abs, cst_pos_inf);
|
||||
PacketMask is_i_pos_inf = pcmp_eq_mask(i_abs, cst_pos_inf);
|
||||
PacketMask is_any_inf = por(is_r_pos_inf, is_i_pos_inf);
|
||||
RealPacket xreal = pselect(is_any_inf, cst_pos_inf, xlogr);
|
||||
|
||||
return Packet(xreal, ximg);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf psqrt<PacketXcf>(const PacketXcf& a) {
|
||||
return psqrt_complex_rvv<PacketXcf>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcf plog<PacketXcf>(const PacketXcf& a) {
|
||||
return plog_complex_rvv<PacketXcf>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct conj_helper<PacketMul2Xf, PacketXcf, false, false> {
|
||||
EIGEN_STRONG_INLINE PacketXcf pmadd(const PacketMul2Xf& x, const PacketXcf& y, const PacketXcf& c) const {
|
||||
return padd(c, this->pmul(x, y));
|
||||
}
|
||||
EIGEN_STRONG_INLINE PacketXcf pmul(const PacketMul2Xf& x, const PacketXcf& y) const {
|
||||
return PacketXcf(Eigen::internal::pmul<PacketMul2Xf>(x, pcast<PacketXcf, PacketMul2Xf>(y)));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct conj_helper<PacketXcf, PacketMul2Xf, false, false> {
|
||||
EIGEN_STRONG_INLINE PacketXcf pmadd(const PacketXcf& x, const PacketMul2Xf& y, const PacketXcf& c) const {
|
||||
return padd(c, this->pmul(x, y));
|
||||
}
|
||||
EIGEN_STRONG_INLINE PacketXcf pmul(const PacketXcf& x, const PacketMul2Xf& y) const {
|
||||
return PacketXcf(Eigen::internal::pmul<PacketMul2Xf>(pcast<PacketXcf, PacketMul2Xf>(x), y));
|
||||
}
|
||||
};
|
||||
|
||||
/********************************* double ************************************/
|
||||
|
||||
struct PacketXcd {
|
||||
EIGEN_STRONG_INLINE PacketXcd() {}
|
||||
EIGEN_STRONG_INLINE explicit PacketXcd(const PacketXd& _real, const PacketXd& _imag) : real(_real), imag(_imag) {}
|
||||
EIGEN_STRONG_INLINE explicit PacketXcd(const PacketMul2Xd& a)
|
||||
: real(__riscv_vget_v_f64m2_f64m1(a, 0)), imag(__riscv_vget_v_f64m2_f64m1(a, 1)) {}
|
||||
PacketXd real;
|
||||
PacketXd imag;
|
||||
};
|
||||
|
||||
template <int LMul>
|
||||
struct packet_traits<std::complex<double>, LMul> : default_packet_traits {
|
||||
typedef PacketXcd type;
|
||||
typedef PacketXcd half;
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 0,
|
||||
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 1>::size,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasMul = 1,
|
||||
HasDiv = 1,
|
||||
HasNegate = 1,
|
||||
HasSqrt = 1,
|
||||
HasSign = 0,
|
||||
HasAbs = 0,
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
HasLog = 0,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<PacketXcd> {
|
||||
typedef std::complex<double> type;
|
||||
typedef PacketXcd half;
|
||||
typedef PacketMul2Xd as_real;
|
||||
enum {
|
||||
size = rvv_packet_size_selector<double, EIGEN_RISCV64_RVV_VL, 1>::size,
|
||||
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
|
||||
vectorizable = true,
|
||||
masked_load_available = false,
|
||||
masked_store_available = false
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pcast<PacketMul2Xd, PacketXcd>(const PacketMul2Xd& a) {
|
||||
return PacketXcd(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xd pcast<PacketXcd, PacketMul2Xd>(const PacketXcd& a) {
|
||||
return __riscv_vcreate_v_f64m1_f64m2(a.real, a.imag);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pset1<PacketXcd>(const std::complex<double>& from) {
|
||||
PacketXd real = pset1<PacketXd>(from.real());
|
||||
PacketXd imag = pset1<PacketXd>(from.imag());
|
||||
return PacketXcd(real, imag);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd padd<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
|
||||
return PacketXcd(padd<PacketXd>(a.real, b.real), padd<PacketXd>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd psub<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
|
||||
return PacketXcd(psub<PacketXd>(a.real, b.real), psub<PacketXd>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pnegate(const PacketXcd& a) {
|
||||
return PacketXcd(pnegate<PacketXd>(a.real), pnegate<PacketXd>(a.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pconj(const PacketXcd& a) {
|
||||
return PacketXcd(
|
||||
a.real, __riscv_vreinterpret_v_u64m1_f64m1(__riscv_vxor_vx_u64m1(
|
||||
__riscv_vreinterpret_v_f64m1_u64m1(a.imag), 0x8000000000000000, unpacket_traits<PacketXd>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pmul<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
|
||||
PacketXd v1 = pmul<PacketXd>(a.real, b.real);
|
||||
PacketXd v2 = pmul<PacketXd>(a.imag, b.imag);
|
||||
PacketXd v3 = pmul<PacketXd>(a.real, b.imag);
|
||||
PacketXd v4 = pmul<PacketXd>(a.imag, b.real);
|
||||
return PacketXcd(psub<PacketXd>(v1, v2), padd<PacketXd>(v3, v4));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pmadd<PacketXcd>(const PacketXcd& a, const PacketXcd& b, const PacketXcd& c) {
|
||||
PacketXd v1 = pmadd<PacketXd>(a.real, b.real, c.real);
|
||||
PacketXd v2 = pmul<PacketXd>(a.imag, b.imag);
|
||||
PacketXd v3 = pmadd<PacketXd>(a.real, b.imag, c.imag);
|
||||
PacketXd v4 = pmul<PacketXd>(a.imag, b.real);
|
||||
return PacketXcd(psub<PacketXd>(v1, v2), padd<PacketXd>(v3, v4));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pcmp_eq(const PacketXcd& a, const PacketXcd& b) {
|
||||
PacketMask64 eq_both = pand<PacketMask64>(pcmp_eq_mask(a.real, b.real), pcmp_eq_mask(a.imag, b.imag));
|
||||
PacketXd res = pselect(eq_both, ptrue<PacketXd>(a.real), pzero<PacketXd>(a.real));
|
||||
return PacketXcd(res, res);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pand<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
|
||||
return PacketXcd(pand<PacketXd>(a.real, b.real), pand<PacketXd>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd por<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
|
||||
return PacketXcd(por<PacketXd>(a.real, b.real), por<PacketXd>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pxor<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
|
||||
return PacketXcd(pxor<PacketXd>(a.real, b.real), pxor<PacketXd>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pandnot<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
|
||||
return PacketXcd(pandnot<PacketXd>(a.real, b.real), pandnot<PacketXd>(a.imag, b.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pload<PacketXcd>(const std::complex<double>* from) {
|
||||
vfloat64m1x2_t res = __riscv_vlseg2e64_v_f64m1x2((const double*)from, unpacket_traits<PacketXd>::size);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return PacketXcd(__riscv_vget_v_f64m1x2_f64m1(res, 0), __riscv_vget_v_f64m1x2_f64m1(res, 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd ploadu<PacketXcd>(const std::complex<double>* from) {
|
||||
vfloat64m1x2_t res = __riscv_vlseg2e64_v_f64m1x2((const double*)from, unpacket_traits<PacketXd>::size);
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return PacketXcd(__riscv_vget_v_f64m1x2_f64m1(res, 0),
|
||||
__riscv_vget_v_f64m1x2_f64m1(res, 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd ploaddup<PacketXcd>(const std::complex<double>* from) {
|
||||
PacketXul real_idx = __riscv_vid_v_u64m1(unpacket_traits<PacketXd>::size);
|
||||
real_idx =
|
||||
__riscv_vsll_vx_u64m1(__riscv_vand_vx_u64m1(real_idx, 0xfffffffffffffffeu, unpacket_traits<PacketXd>::size), 3,
|
||||
unpacket_traits<PacketXd>::size);
|
||||
PacketXul imag_idx = __riscv_vadd_vx_u64m1(real_idx, sizeof(double), unpacket_traits<PacketXd>::size);
|
||||
// real_idx = 0 0 2*sizeof(double) 2*sizeof(double) 4*sizeof(double) 4*sizeof(double) ...
|
||||
return PacketXcd(__riscv_vloxei64_v_f64m1((const double*)from, real_idx, unpacket_traits<PacketXd>::size),
|
||||
__riscv_vloxei64_v_f64m1((const double*)from, imag_idx, unpacket_traits<PacketXd>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd ploadquad<PacketXcd>(const std::complex<double>* from) {
|
||||
PacketXul real_idx = __riscv_vid_v_u64m1(unpacket_traits<PacketXd>::size);
|
||||
real_idx =
|
||||
__riscv_vsll_vx_u64m1(__riscv_vand_vx_u64m1(real_idx, 0xfffffffffffffffcu, unpacket_traits<PacketXd>::size), 2,
|
||||
unpacket_traits<PacketXd>::size);
|
||||
PacketXul imag_idx = __riscv_vadd_vx_u64m1(real_idx, sizeof(double), unpacket_traits<PacketXd>::size);
|
||||
// real_idx = 0 0 2*sizeof(double) 2*sizeof(double) 4*sizeof(double) 4*sizeof(double) ...
|
||||
return PacketXcd(__riscv_vloxei64_v_f64m1((const double*)from, real_idx, unpacket_traits<PacketXd>::size),
|
||||
__riscv_vloxei64_v_f64m1((const double*)from, imag_idx, unpacket_traits<PacketXd>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<std::complex<double> >(std::complex<double>* to, const PacketXcd& from) {
|
||||
vfloat64m1x2_t vx2 = __riscv_vundefined_f64m1x2();
|
||||
vx2 = __riscv_vset_v_f64m1_f64m1x2(vx2, 0, from.real);
|
||||
vx2 = __riscv_vset_v_f64m1_f64m1x2(vx2, 1, from.imag);
|
||||
EIGEN_DEBUG_ALIGNED_STORE __riscv_vsseg2e64_v_f64m1x2((double*)to, vx2, unpacket_traits<PacketXcd>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double>* to, const PacketXcd& from) {
|
||||
vfloat64m1x2_t vx2 = __riscv_vundefined_f64m1x2();
|
||||
vx2 = __riscv_vset_v_f64m1_f64m1x2(vx2, 0, from.real);
|
||||
vx2 = __riscv_vset_v_f64m1_f64m1x2(vx2, 1, from.imag);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vsseg2e64_v_f64m1x2((double*)to, vx2, unpacket_traits<PacketXd>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline PacketXcd pgather<std::complex<double>, PacketXcd>(const std::complex<double>* from,
|
||||
Index stride) {
|
||||
vfloat64m1x2_t res =
|
||||
__riscv_vlsseg2e64_v_f64m1x2((const double*)from, 2 * stride * sizeof(double), unpacket_traits<PacketXd>::size);
|
||||
return PacketXcd(__riscv_vget_v_f64m1x2_f64m1(res, 0), __riscv_vget_v_f64m1x2_f64m1(res, 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, PacketXcd>(std::complex<double>* to, const PacketXcd& from,
|
||||
Index stride) {
|
||||
vfloat64m1x2_t from_rvv_type = __riscv_vundefined_f64m1x2();
|
||||
from_rvv_type = __riscv_vset_v_f64m1_f64m1x2(from_rvv_type, 0, from.real);
|
||||
from_rvv_type = __riscv_vset_v_f64m1_f64m1x2(from_rvv_type, 1, from.imag);
|
||||
__riscv_vssseg2e64_v_f64m1x2((double*)to, 2 * stride * sizeof(double), from_rvv_type,
|
||||
unpacket_traits<PacketXd>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> pfirst<PacketXcd>(const PacketXcd& a) {
|
||||
return std::complex<double>(pfirst<PacketXd>(a.real), pfirst<PacketXd>(a.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd preverse(const PacketXcd& a) {
|
||||
return PacketXcd(preverse<PacketXd>(a.real), preverse<PacketXd>(a.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pcplxflip<PacketXcd>(const PacketXcd& a) {
|
||||
return PacketXcd(a.imag, a.real);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE std::complex<double> predux<PacketXcd>(const PacketXcd& a) {
|
||||
return std::complex<double>(predux<PacketXd>(a.real), predux<PacketXd>(a.imag));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd pdiv<PacketXcd>(const PacketXcd& a, const PacketXcd& b) {
|
||||
PacketXcd b_conj = pconj<PacketXcd>(b);
|
||||
PacketXcd dividend = pmul<PacketXcd>(a, b_conj);
|
||||
PacketXd divider = psub<PacketXd>(pmul<PacketXd>(b.real, b_conj.real), pmul<PacketXd>(b.imag, b_conj.imag));
|
||||
return PacketXcd(pdiv<PacketXd>(dividend.real, divider), pdiv<PacketXd>(dividend.imag, divider));
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXcd, N>& kernel) {
|
||||
double buffer_real[unpacket_traits<PacketXd>::size * N];
|
||||
double buffer_imag[unpacket_traits<PacketXd>::size * N];
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
__riscv_vsse64(&buffer_real[i], N * sizeof(double), kernel.packet[i].real, unpacket_traits<PacketXd>::size);
|
||||
__riscv_vsse64(&buffer_imag[i], N * sizeof(double), kernel.packet[i].imag, unpacket_traits<PacketXd>::size);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
kernel.packet[i].real =
|
||||
__riscv_vle64_v_f64m1(&buffer_real[i * unpacket_traits<PacketXd>::size], unpacket_traits<PacketXd>::size);
|
||||
kernel.packet[i].imag =
|
||||
__riscv_vle64_v_f64m1(&buffer_imag[i * unpacket_traits<PacketXd>::size], unpacket_traits<PacketXd>::size);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd psqrt<PacketXcd>(const PacketXcd& a) {
|
||||
return psqrt_complex_rvv<PacketXcd>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXcd plog<PacketXcd>(const PacketXcd& a) {
|
||||
return plog_complex_rvv<PacketXcd>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct conj_helper<PacketMul2Xd, PacketXcd, false, false> {
|
||||
EIGEN_STRONG_INLINE PacketXcd pmadd(const PacketMul2Xd& x, const PacketXcd& y, const PacketXcd& c) const {
|
||||
return padd(c, this->pmul(x, y));
|
||||
}
|
||||
EIGEN_STRONG_INLINE PacketXcd pmul(const PacketMul2Xd& x, const PacketXcd& y) const {
|
||||
return PacketXcd(Eigen::internal::pmul<PacketMul2Xd>(x, pcast<PacketXcd, PacketMul2Xd>(y)));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct conj_helper<PacketXcd, PacketMul2Xd, false, false> {
|
||||
EIGEN_STRONG_INLINE PacketXcd pmadd(const PacketXcd& x, const PacketMul2Xd& y, const PacketXcd& c) const {
|
||||
return padd(c, this->pmul(x, y));
|
||||
}
|
||||
EIGEN_STRONG_INLINE PacketXcd pmul(const PacketXcd& x, const PacketMul2Xd& y) const {
|
||||
return PacketXcd(Eigen::internal::pmul<PacketMul2Xd>(pcast<PacketXcd, PacketMul2Xd>(x), y));
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_COMPLEX_RVV10_H
|
491
Eigen/src/Core/arch/RVV10/GeneralBlockPanelKernel.h
Normal file
491
Eigen/src/Core/arch/RVV10/GeneralBlockPanelKernel.h
Normal file
@ -0,0 +1,491 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2024 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_RVV10_GENERAL_BLOCK_KERNEL_H
|
||||
#define EIGEN_RVV10_GENERAL_BLOCK_KERNEL_H
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/********************************* real ************************************/
|
||||
|
||||
template <>
|
||||
struct gebp_traits<float, float, false, false, Architecture::RVV10, GEBPPacketFull>
|
||||
: gebp_traits<float, float, false, false, Architecture::Generic, GEBPPacketFull> {
|
||||
typedef float RhsPacket;
|
||||
typedef QuadPacket<float> RhsPacketx4;
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
|
||||
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
|
||||
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = ploadquad<RhsPacket>(b); }
|
||||
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
|
||||
const FixedInt<0>&) const {
|
||||
c = __riscv_vfmadd_vf_f32m1(a, b, c, unpacket_traits<AccPacket>::size);
|
||||
}
|
||||
|
||||
template <typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
|
||||
const LaneIdType& lane) const {
|
||||
c = __riscv_vfmadd_vf_f32m1(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct gebp_traits<double, double, false, false, Architecture::RVV10, GEBPPacketFull>
|
||||
: gebp_traits<double, double, false, false, Architecture::Generic, GEBPPacketFull> {
|
||||
typedef double RhsPacket;
|
||||
typedef QuadPacket<double> RhsPacketx4;
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
|
||||
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
|
||||
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = ploadquad<RhsPacket>(b); }
|
||||
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
|
||||
const FixedInt<0>&) const {
|
||||
c = __riscv_vfmadd_vf_f64m1(a, b, c, unpacket_traits<AccPacket>::size);
|
||||
}
|
||||
|
||||
template <typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
|
||||
const LaneIdType& lane) const {
|
||||
c = __riscv_vfmadd_vf_f64m1(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(EIGEN_VECTORIZE_RVV10FP16)
|
||||
|
||||
template <>
|
||||
struct gebp_traits<half, half, false, false, Architecture::RVV10>
|
||||
: gebp_traits<half, half, false, false, Architecture::Generic> {
|
||||
typedef half RhsPacket;
|
||||
typedef PacketXh LhsPacket;
|
||||
typedef PacketXh AccPacket;
|
||||
typedef QuadPacket<half> RhsPacketx4;
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
|
||||
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
|
||||
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = pload<RhsPacket>(b); }
|
||||
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
|
||||
const FixedInt<0>&) const {
|
||||
c = __riscv_vfmadd_vf_f16m1(a, b, c, unpacket_traits<AccPacket>::size);
|
||||
}
|
||||
|
||||
template <typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
|
||||
const LaneIdType& lane) const {
|
||||
c = __riscv_vfmadd_vf_f16m1(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/********************************* complex ************************************/
|
||||
|
||||
#define PACKET_DECL_COND_POSTFIX(postfix, name, packet_size) \
|
||||
typedef typename packet_conditional< \
|
||||
packet_size, typename packet_traits<name##Scalar>::type, typename packet_traits<name##Scalar>::half, \
|
||||
typename unpacket_traits<typename packet_traits<name##Scalar>::half>::half>::type name##Packet##postfix
|
||||
|
||||
#define RISCV_COMPLEX_PACKET_DECL_COND_SCALAR(packet_size) \
|
||||
typedef typename packet_conditional< \
|
||||
packet_size, typename packet_traits<Scalar>::type, typename packet_traits<Scalar>::half, \
|
||||
typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type ScalarPacket
|
||||
|
||||
template <typename RealScalar, bool ConjLhs_, bool ConjRhs_, int PacketSize_>
|
||||
struct gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, ConjLhs_, ConjRhs_, Architecture::RVV10,
|
||||
PacketSize_> : gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, ConjLhs_, ConjRhs_,
|
||||
Architecture::Generic, PacketSize_> {
|
||||
typedef std::complex<RealScalar> Scalar;
|
||||
typedef std::complex<RealScalar> LhsScalar;
|
||||
typedef std::complex<RealScalar> RhsScalar;
|
||||
typedef std::complex<RealScalar> ResScalar;
|
||||
typedef typename packet_traits<std::complex<RealScalar>>::type RealPacket;
|
||||
|
||||
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
|
||||
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
|
||||
PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
|
||||
RISCV_COMPLEX_PACKET_DECL_COND_SCALAR(PacketSize_);
|
||||
#undef RISCV_COMPLEX_PACKET_DECL_COND_SCALAR
|
||||
|
||||
enum {
|
||||
ConjLhs = ConjLhs_,
|
||||
ConjRhs = ConjRhs_,
|
||||
Vectorizable = unpacket_traits<RealPacket>::vectorizable && unpacket_traits<ScalarPacket>::vectorizable,
|
||||
ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
|
||||
LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
|
||||
RhsPacketSize = Vectorizable ? unpacket_traits<RhsScalar>::size : 1,
|
||||
RealPacketSize = Vectorizable ? unpacket_traits<RealPacket>::size : 1,
|
||||
|
||||
nr = 4,
|
||||
mr = ResPacketSize,
|
||||
|
||||
LhsProgress = ResPacketSize,
|
||||
RhsProgress = 1
|
||||
};
|
||||
|
||||
typedef DoublePacket<RealPacket> DoublePacketType;
|
||||
|
||||
typedef std::conditional_t<Vectorizable, ScalarPacket, Scalar> LhsPacket4Packing;
|
||||
typedef std::conditional_t<Vectorizable, RealPacket, Scalar> LhsPacket;
|
||||
typedef std::conditional_t<Vectorizable, DoublePacket<RealScalar>, Scalar> RhsPacket;
|
||||
typedef std::conditional_t<Vectorizable, ScalarPacket, Scalar> ResPacket;
|
||||
typedef std::conditional_t<Vectorizable, DoublePacketType, Scalar> AccPacket;
|
||||
|
||||
typedef QuadPacket<RhsPacket> RhsPacketx4;
|
||||
|
||||
EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
|
||||
|
||||
EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p) {
|
||||
p.first = pset1<RealPacket>(RealScalar(0));
|
||||
p.second = pset1<RealPacket>(RealScalar(0));
|
||||
}
|
||||
|
||||
// Scalar path
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ScalarPacket& dest) const { dest = pset1<ScalarPacket>(*b); }
|
||||
|
||||
// Vectorized path
|
||||
template <typename RealPacketType>
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacket<RealPacketType>& dest) const {
|
||||
dest.first = pset1<RealPacketType>(numext::real(*b));
|
||||
dest.second = pset1<RealPacketType>(numext::imag(*b));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
|
||||
loadRhs(b, dest.B_0);
|
||||
loadRhs(b + 1, dest.B1);
|
||||
loadRhs(b + 2, dest.B2);
|
||||
loadRhs(b + 3, dest.B3);
|
||||
}
|
||||
|
||||
// Scalar path
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, ScalarPacket& dest) const { loadRhs(b, dest); }
|
||||
|
||||
// Vectorized path
|
||||
template <typename RealPacketType>
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, DoublePacket<RealPacketType>& dest) const {
|
||||
loadRhs(b, dest);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const { loadRhs(b, dest); }
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacket<RealScalar>& dest) const {
|
||||
loadQuadToDoublePacket(b, dest);
|
||||
}
|
||||
|
||||
// nothing special here
|
||||
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const {
|
||||
dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
|
||||
}
|
||||
|
||||
template <typename LhsPacketType>
|
||||
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const {
|
||||
dest = ploadu<LhsPacketType>((const typename unpacket_traits<LhsPacketType>::type*)(a));
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketXcf pmadd_scalar(const PacketXcf& a, float b, const PacketXcf& c) const {
|
||||
PacketXf v1 = __riscv_vfmadd_vf_f32m1(a.real, b, c.real, unpacket_traits<PacketXf>::size);
|
||||
PacketXf v4 = __riscv_vfmadd_vf_f32m1(a.imag, b, c.imag, unpacket_traits<PacketXf>::size);
|
||||
return PacketXcf(v1, v4);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketXcd pmadd_scalar(const PacketXcd& a, double b, const PacketXcd& c) const {
|
||||
PacketXd v1 = __riscv_vfmadd_vf_f64m1(a.real, b, c.real, unpacket_traits<PacketXd>::size);
|
||||
PacketXd v4 = __riscv_vfmadd_vf_f64m1(a.imag, b, c.imag, unpacket_traits<PacketXd>::size);
|
||||
return PacketXcd(v1, v4);
|
||||
}
|
||||
|
||||
template <typename LhsPacketType, typename RhsPacketType, typename ResPacketType, typename TmpType,
|
||||
typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE std::enable_if_t<!is_same<RhsPacketType, RhsPacketx4>::value> madd(const LhsPacketType& a,
|
||||
const RhsPacketType& b,
|
||||
DoublePacket<ResPacketType>& c,
|
||||
TmpType& /*tmp*/,
|
||||
const LaneIdType&) const {
|
||||
c.first = pmadd_scalar(a, b.first, c.first);
|
||||
c.second = pmadd_scalar(a, b.second, c.second);
|
||||
}
|
||||
|
||||
template <typename LhsPacketType, typename AccPacketType, typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp,
|
||||
const LaneIdType& lane) const {
|
||||
madd(a, b.get(lane), c, tmp, lane);
|
||||
}
|
||||
|
||||
template <typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/,
|
||||
const LaneIdType&) const {
|
||||
c = cj.pmadd(a, b, c);
|
||||
}
|
||||
|
||||
protected:
|
||||
conj_helper<LhsScalar, RhsScalar, ConjLhs, ConjRhs> cj;
|
||||
};
|
||||
|
||||
#define PACKET_DECL_COND_SCALAR_POSTFIX(postfix, packet_size) \
|
||||
typedef typename packet_conditional< \
|
||||
packet_size, typename packet_traits<Scalar>::type, typename packet_traits<Scalar>::half, \
|
||||
typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type ScalarPacket##postfix
|
||||
|
||||
template <typename RealScalar, bool ConjRhs_, int PacketSize_>
|
||||
class gebp_traits<RealScalar, std::complex<RealScalar>, false, ConjRhs_, Architecture::RVV10, PacketSize_>
|
||||
: public gebp_traits<RealScalar, std::complex<RealScalar>, false, ConjRhs_, Architecture::Generic, PacketSize_> {
|
||||
public:
|
||||
typedef std::complex<RealScalar> Scalar;
|
||||
typedef RealScalar LhsScalar;
|
||||
typedef Scalar RhsScalar;
|
||||
typedef Scalar ResScalar;
|
||||
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
|
||||
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
|
||||
PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
|
||||
PACKET_DECL_COND_POSTFIX(_, Real, PacketSize_);
|
||||
PACKET_DECL_COND_SCALAR_POSTFIX(_, PacketSize_);
|
||||
#undef PACKET_DECL_COND_SCALAR_POSTFIX
|
||||
|
||||
enum {
|
||||
ConjLhs = false,
|
||||
ConjRhs = ConjRhs_,
|
||||
Vectorizable = unpacket_traits<RealPacket_>::vectorizable && unpacket_traits<ScalarPacket_>::vectorizable,
|
||||
LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
|
||||
RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
|
||||
ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
|
||||
|
||||
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
|
||||
// FIXME: should depend on NumberOfRegisters
|
||||
nr = 4,
|
||||
mr = (plain_enum_min(16, NumberOfRegisters) / 2 / nr) * ResPacketSize,
|
||||
|
||||
LhsProgress = ResPacketSize,
|
||||
RhsProgress = 1
|
||||
};
|
||||
|
||||
typedef std::conditional_t<Vectorizable, LhsPacket_, LhsScalar> LhsPacket;
|
||||
typedef RhsScalar RhsPacket;
|
||||
typedef std::conditional_t<Vectorizable, ResPacket_, ResScalar> ResPacket;
|
||||
typedef LhsPacket LhsPacket4Packing;
|
||||
typedef QuadPacket<RhsPacket> RhsPacketx4;
|
||||
typedef ResPacket AccPacket;
|
||||
|
||||
EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { p = pset1<ResPacket>(ResScalar(0)); }
|
||||
|
||||
template <typename RhsPacketType>
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const {
|
||||
dest = pset1<RhsPacketType>(*b);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
|
||||
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
|
||||
}
|
||||
|
||||
template <typename RhsPacketType>
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketType& dest) const {
|
||||
loadRhs(b, dest);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { dest = pload<LhsPacket>(a); }
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = ploadquad<RhsPacket>(b); }
|
||||
|
||||
template <typename LhsPacketType>
|
||||
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const {
|
||||
dest = ploadu<LhsPacketType>((const typename unpacket_traits<LhsPacketType>::type*)a);
|
||||
}
|
||||
|
||||
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp,
|
||||
const LaneIdType&) const {
|
||||
madd_impl(a, b, c, tmp, std::conditional_t<Vectorizable, true_type, false_type>());
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketXcf pmadd_scalar(const PacketXf& a, std::complex<float> b, const PacketXcf& c) const {
|
||||
PacketXf v1 = __riscv_vfmadd_vf_f32m1(a, b.real(), c.real, unpacket_traits<PacketXf>::size);
|
||||
PacketXf v3 = __riscv_vfmadd_vf_f32m1(a, b.imag(), c.imag, unpacket_traits<PacketXf>::size);
|
||||
return PacketXcf(v1, v3);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketXcd pmadd_scalar(const PacketXd& a, std::complex<double> b, const PacketXcd& c) const {
|
||||
PacketXd v1 = __riscv_vfmadd_vf_f64m1(a, b.real(), c.real, unpacket_traits<PacketXd>::size);
|
||||
PacketXd v3 = __riscv_vfmadd_vf_f64m1(a, b.imag(), c.imag, unpacket_traits<PacketXd>::size);
|
||||
return PacketXcd(v1, v3);
|
||||
}
|
||||
|
||||
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c,
|
||||
RhsPacketType& tmp, const true_type&) const {
|
||||
EIGEN_UNUSED_VARIABLE(tmp);
|
||||
c = pmadd_scalar(a, b, c);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/,
|
||||
const false_type&) const {
|
||||
c += a * b;
|
||||
}
|
||||
|
||||
template <typename LhsPacketType, typename AccPacketType, typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp,
|
||||
const LaneIdType& lane) const {
|
||||
madd(a, b.get(lane), c, tmp, lane);
|
||||
}
|
||||
|
||||
template <typename ResPacketType, typename AccPacketType>
|
||||
EIGEN_STRONG_INLINE void acc(const AccPacketType& c, const ResPacketType& alpha, ResPacketType& r) const {
|
||||
conj_helper<ResPacketType, ResPacketType, false, ConjRhs> cj;
|
||||
r = cj.pmadd(alpha, c, r);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename RealScalar, bool ConjLhs_, int PacketSize_>
|
||||
class gebp_traits<std::complex<RealScalar>, RealScalar, ConjLhs_, false, Architecture::RVV10, PacketSize_>
|
||||
: public gebp_traits<RealScalar, std::complex<RealScalar>, ConjLhs_, false, Architecture::Generic, PacketSize_> {
|
||||
public:
|
||||
typedef std::complex<RealScalar> LhsScalar;
|
||||
typedef RealScalar RhsScalar;
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
|
||||
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
|
||||
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
|
||||
PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
|
||||
#undef PACKET_DECL_COND_POSTFIX
|
||||
|
||||
enum {
|
||||
ConjLhs = ConjLhs_,
|
||||
ConjRhs = false,
|
||||
Vectorizable = unpacket_traits<LhsPacket_>::vectorizable && unpacket_traits<RhsPacket_>::vectorizable,
|
||||
LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
|
||||
RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
|
||||
ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
|
||||
|
||||
nr = 4,
|
||||
mr = 3 * LhsPacketSize,
|
||||
|
||||
LhsProgress = LhsPacketSize,
|
||||
RhsProgress = 1
|
||||
};
|
||||
|
||||
typedef std::conditional_t<Vectorizable, LhsPacket_, LhsScalar> LhsPacket;
|
||||
typedef RhsScalar RhsPacket;
|
||||
typedef std::conditional_t<Vectorizable, ResPacket_, ResScalar> ResPacket;
|
||||
typedef LhsPacket LhsPacket4Packing;
|
||||
|
||||
typedef QuadPacket<RhsPacket> RhsPacketx4;
|
||||
|
||||
typedef ResPacket AccPacket;
|
||||
|
||||
EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { p = pset1<ResPacket>(ResScalar(0)); }
|
||||
|
||||
template <typename RhsPacketType>
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const {
|
||||
dest = pset1<RhsPacketType>(*b);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
|
||||
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
|
||||
}
|
||||
|
||||
template <typename RhsPacketType>
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketType& dest) const {
|
||||
loadRhs(b, dest);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const {
|
||||
loadRhsQuad_impl(b, dest, std::conditional_t<RhsPacketSize == 16, true_type, false_type>());
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar* b, RhsPacket& dest, const true_type&) const {
|
||||
// FIXME we can do better!
|
||||
// what we want here is a ploadheight
|
||||
RhsScalar tmp[4] = {b[0], b[0], b[1], b[1]};
|
||||
dest = ploadquad<RhsPacket>(tmp);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar* b, RhsPacket& dest, const false_type&) const {
|
||||
eigen_internal_assert(RhsPacketSize <= 8);
|
||||
dest = pset1<RhsPacket>(*b);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { dest = pload<LhsPacket>(a); }
|
||||
|
||||
template <typename LhsPacketType>
|
||||
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const {
|
||||
dest = ploadu<LhsPacketType>(a);
|
||||
}
|
||||
|
||||
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp,
|
||||
const LaneIdType&) const {
|
||||
madd_impl(a, b, c, tmp, std::conditional_t<Vectorizable, true_type, false_type>());
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketXcf pmadd_scalar(const PacketXcf& a, float b, const PacketXcf& c) const {
|
||||
PacketXf v1 = __riscv_vfmadd_vf_f32m1(a.real, b, c.real, unpacket_traits<PacketXf>::size);
|
||||
PacketXf v3 = __riscv_vfmadd_vf_f32m1(a.imag, b, c.imag, unpacket_traits<PacketXf>::size);
|
||||
return PacketXcf(v1, v3);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketXcd pmadd_scalar(const PacketXcd& a, double b, const PacketXcd& c) const {
|
||||
PacketXd v1 = __riscv_vfmadd_vf_f64m1(a.real, b, c.real, unpacket_traits<PacketXd>::size);
|
||||
PacketXd v3 = __riscv_vfmadd_vf_f64m1(a.imag, b, c.imag, unpacket_traits<PacketXd>::size);
|
||||
return PacketXcd(v1, v3);
|
||||
}
|
||||
|
||||
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c,
|
||||
RhsPacketType& tmp, const true_type&) const {
|
||||
EIGEN_UNUSED_VARIABLE(tmp);
|
||||
c = pmadd_scalar(a, b, c);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/,
|
||||
const false_type&) const {
|
||||
c += a * b;
|
||||
}
|
||||
|
||||
template <typename LhsPacketType, typename AccPacketType, typename LaneIdType>
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp,
|
||||
const LaneIdType& lane) const {
|
||||
madd(a, b.get(lane), c, tmp, lane);
|
||||
}
|
||||
|
||||
template <typename ResPacketType, typename AccPacketType>
|
||||
EIGEN_STRONG_INLINE void acc(const AccPacketType& c, const ResPacketType& alpha, ResPacketType& r) const {
|
||||
conj_helper<ResPacketType, ResPacketType, ConjLhs, false> cj;
|
||||
r = cj.pmadd(c, alpha, r);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_RVV10_GENERAL_BLOCK_KERNEL_H
|
30
Eigen/src/Core/arch/RVV10/MathFunctions.h
Normal file
30
Eigen/src/Core/arch/RVV10/MathFunctions.h
Normal file
@ -0,0 +1,30 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2024 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_MATH_FUNCTIONS_RVV10_H
|
||||
#define EIGEN_MATH_FUNCTIONS_RVV10_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PacketXf)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PacketMul2Xf)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PacketMul4Xf)
|
||||
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PacketXd)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PacketMul2Xd)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PacketMul4Xd)
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_RVV10_H
|
5180
Eigen/src/Core/arch/RVV10/PacketMath.h
Normal file
5180
Eigen/src/Core/arch/RVV10/PacketMath.h
Normal file
File diff suppressed because it is too large
Load Diff
917
Eigen/src/Core/arch/RVV10/PacketMathFP16.h
Normal file
917
Eigen/src/Core/arch/RVV10/PacketMathFP16.h
Normal file
@ -0,0 +1,917 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2025 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_PACKET_MATH_FP16_RVV10_H
|
||||
#define EIGEN_PACKET_MATH_FP16_RVV10_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
typedef vfloat16m1_t PacketXh __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL)));
|
||||
typedef vfloat16m2_t PacketMul2Xh __attribute__((riscv_rvv_vector_bits(EIGEN_RISCV64_RVV_VL * 2)));
|
||||
|
||||
template <>
|
||||
struct packet_traits<Eigen::half> : default_packet_traits {
|
||||
typedef PacketXh type;
|
||||
typedef PacketXh half;
|
||||
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = rvv_packet_size_selector<Eigen::half, EIGEN_RISCV64_RVV_VL, 1>::size,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
HasMul = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 1,
|
||||
HasArg = 0,
|
||||
HasAbs2 = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasReduxp = 0,
|
||||
|
||||
HasCmp = 1,
|
||||
HasDiv = 1,
|
||||
HasFloor = 1,
|
||||
HasRint = 1,
|
||||
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasSqrt = 1,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasErf = 0
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct packet_traits<Eigen::half, 2> : default_packet_traits {
|
||||
typedef PacketMul2Xh type;
|
||||
typedef PacketXh half;
|
||||
|
||||
enum {
|
||||
Vectorizable = 1,
|
||||
AlignedOnScalar = 1,
|
||||
size = rvv_packet_size_selector<Eigen::half, EIGEN_RISCV64_RVV_VL, 2>::size,
|
||||
|
||||
HasAdd = 1,
|
||||
HasSub = 1,
|
||||
HasShift = 1,
|
||||
HasMul = 1,
|
||||
HasNegate = 1,
|
||||
HasAbs = 1,
|
||||
HasArg = 0,
|
||||
HasAbs2 = 1,
|
||||
HasMin = 1,
|
||||
HasMax = 1,
|
||||
HasConj = 1,
|
||||
HasSetLinear = 0,
|
||||
HasBlend = 0,
|
||||
HasReduxp = 0,
|
||||
|
||||
HasCmp = 1,
|
||||
HasDiv = 1,
|
||||
HasFloor = 1,
|
||||
HasRint = 1,
|
||||
|
||||
HasSin = EIGEN_FAST_MATH,
|
||||
HasCos = EIGEN_FAST_MATH,
|
||||
HasLog = 0,
|
||||
HasExp = 0,
|
||||
HasSqrt = 1,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
HasErf = 0
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<PacketXh> {
|
||||
typedef Eigen::half type;
|
||||
typedef PacketXh half; // Half not yet implemented
|
||||
typedef PacketXs integer_packet;
|
||||
typedef numext::uint8_t mask_t;
|
||||
|
||||
enum {
|
||||
size = rvv_packet_size_selector<Eigen::half, EIGEN_RISCV64_RVV_VL, 1>::size,
|
||||
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 1>::alignment,
|
||||
vectorizable = true,
|
||||
masked_load_available = false,
|
||||
masked_store_available = false
|
||||
};
|
||||
};
|
||||
|
||||
template <>
|
||||
struct unpacket_traits<PacketMul2Xh> {
|
||||
typedef Eigen::half type;
|
||||
typedef PacketXh half;
|
||||
typedef PacketMul2Xs integer_packet;
|
||||
typedef numext::uint8_t mask_t;
|
||||
|
||||
enum {
|
||||
size = rvv_packet_size_selector<Eigen::half, EIGEN_RISCV64_RVV_VL, 2>::size,
|
||||
alignment = rvv_packet_alignment_selector<EIGEN_RISCV64_RVV_VL, 2>::alignment,
|
||||
vectorizable = true,
|
||||
masked_load_available = false,
|
||||
masked_store_available = false
|
||||
};
|
||||
};
|
||||
|
||||
/********************************* PacketXh ************************************/
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh ptrue<PacketXh>(const PacketXh& /*a*/) {
|
||||
return __riscv_vreinterpret_f16m1(__riscv_vmv_v_x_u16m1(0xffffu, unpacket_traits<PacketXh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pzero<PacketXh>(const PacketXh& /*a*/) {
|
||||
return __riscv_vfmv_v_f_f16m1(static_cast<Eigen::half>(0.0), unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pabs(const PacketXh& a) {
|
||||
return __riscv_vfabs_v_f16m1(a, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pset1<PacketXh>(const Eigen::half& from) {
|
||||
return __riscv_vfmv_v_f_f16m1(static_cast<_Float16>(from), unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pset1frombits<PacketXh>(numext::uint16_t from) {
|
||||
return __riscv_vreinterpret_f16m1(__riscv_vmv_v_x_u16m1(from, unpacket_traits<PacketXh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh plset<PacketXh>(const Eigen::half& a) {
|
||||
PacketXh idx =
|
||||
__riscv_vfcvt_f_x_v_f16m1(__riscv_vid_v_i16m1(unpacket_traits<PacketXs>::size), unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vfadd_vf_f16m1(idx, a, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh padd<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfadd_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh psub<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfsub_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pnegate(const PacketXh& a) {
|
||||
return __riscv_vfneg_v_f16m1(a, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pconj(const PacketXh& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmul<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfmul_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pdiv<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfdiv_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmadd(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
|
||||
return __riscv_vfmadd_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmsub(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
|
||||
return __riscv_vfmsub_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pnmadd(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
|
||||
return __riscv_vfnmsub_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pnmsub(const PacketXh& a, const PacketXh& b, const PacketXh& c) {
|
||||
return __riscv_vfnmadd_vv_f16m1(a, b, c, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmin<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketXh nans =
|
||||
__riscv_vfmv_v_f_f16m1((std::numeric_limits<Eigen::half>::quiet_NaN)(), unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, a, unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask2 = __riscv_vmfeq_vv_f16m1_b16(b, b, unpacket_traits<PacketXh>::size);
|
||||
mask = __riscv_vmand_mm_b16(mask, mask2, unpacket_traits<PacketXh>::size);
|
||||
|
||||
return __riscv_vfmin_vv_f16m1_tum(mask, nans, a, b, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmin<PropagateNaN, PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return pmin<PacketXh>(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmin<PropagateNumbers, PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfmin_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmax<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketXh nans =
|
||||
__riscv_vfmv_v_f_f16m1((std::numeric_limits<Eigen::half>::quiet_NaN)(), unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, a, unpacket_traits<PacketXh>::size);
|
||||
PacketMask16 mask2 = __riscv_vmfeq_vv_f16m1_b16(b, b, unpacket_traits<PacketXh>::size);
|
||||
mask = __riscv_vmand_mm_b16(mask, mask2, unpacket_traits<PacketXh>::size);
|
||||
|
||||
return __riscv_vfmax_vv_f16m1_tum(mask, nans, a, b, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmax<PropagateNaN, PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return pmax<PacketXh>(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pmax<PropagateNumbers, PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vfmax_vv_f16m1(a, b, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcmp_le<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketMask16 mask = __riscv_vmfle_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(pzero<PacketXh>(a), ptrue<PacketXh>(a), mask, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcmp_lt<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketMask16 mask = __riscv_vmflt_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(pzero<PacketXh>(a), ptrue<PacketXh>(a), mask, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcmp_eq<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketMask16 mask = __riscv_vmfeq_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(pzero<PacketXh>(a), ptrue<PacketXh>(a), mask, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcmp_lt_or_nan<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
PacketMask16 mask = __riscv_vmfge_vv_f16m1_b16(a, b, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vfmerge_vfm_f16m1(ptrue<PacketXh>(a), static_cast<Eigen::half>(0.0), mask,
|
||||
unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
// Logical Operations are not supported for half, so reinterpret casts
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pand<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vand_vv_u16m1(
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh por<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vor_vv_u16m1(
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pxor<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vxor_vv_u16m1(
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a), __riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pandnot<PacketXh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vreinterpret_v_u16m1_f16m1(__riscv_vand_vv_u16m1(
|
||||
__riscv_vreinterpret_v_f16m1_u16m1(a),
|
||||
__riscv_vnot_v_u16m1(__riscv_vreinterpret_v_f16m1_u16m1(b), unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pload<PacketXh>(const Eigen::half* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle16_v_f16m1(reinterpret_cast<const _Float16*>(from),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh ploadu<PacketXh>(const Eigen::half* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle16_v_f16m1(reinterpret_cast<const _Float16*>(from),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh ploaddup<PacketXh>(const Eigen::half* from) {
|
||||
PacketXsu idx = __riscv_vid_v_u16m1(unpacket_traits<PacketXh>::size);
|
||||
idx = __riscv_vand_vx_u16m1(idx, 0xfffeu, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vloxei16_v_f16m1(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh ploadquad<PacketXh>(const Eigen::half* from) {
|
||||
PacketXsu idx = __riscv_vid_v_u16m1(unpacket_traits<PacketXh>::size);
|
||||
idx = __riscv_vsrl_vx_u16m1(__riscv_vand_vx_u16m1(idx, 0xfffcu, unpacket_traits<PacketXh>::size), 1,
|
||||
unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vloxei16_v_f16m1(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const PacketXh& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse16_v_f16m1(reinterpret_cast<_Float16*>(to), from,
|
||||
unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const PacketXh& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse16_v_f16m1(reinterpret_cast<_Float16*>(to), from,
|
||||
unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline PacketXh pgather<Eigen::half, PacketXh>(const Eigen::half* from, Index stride) {
|
||||
return __riscv_vlse16_v_f16m1(reinterpret_cast<const _Float16*>(from), stride * sizeof(Eigen::half),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<Eigen::half, PacketXh>(Eigen::half* to, const PacketXh& from, Index stride) {
|
||||
__riscv_vsse16(reinterpret_cast<_Float16*>(to), stride * sizeof(Eigen::half), from, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half pfirst<PacketXh>(const PacketXh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f_s_f16m1_f16(a));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh psqrt(const PacketXh& a) {
|
||||
return __riscv_vfsqrt_v_f16m1(a, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh print<PacketXh>(const PacketXh& a) {
|
||||
const PacketXh limit = pset1<PacketXh>(static_cast<Eigen::half>(1 << 10));
|
||||
const PacketXh abs_a = pabs(a);
|
||||
|
||||
PacketMask16 mask = __riscv_vmfne_vv_f16m1_b16(a, a, unpacket_traits<PacketXh>::size);
|
||||
const PacketXh x = __riscv_vfadd_vv_f16m1_tum(mask, a, a, a, unpacket_traits<PacketXh>::size);
|
||||
const PacketXh new_x = __riscv_vfcvt_f_x_v_f16m1(__riscv_vfcvt_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
|
||||
mask = __riscv_vmflt_vv_f16m1_b16(abs_a, limit, unpacket_traits<PacketXh>::size);
|
||||
PacketXh signed_x = __riscv_vfsgnj_vv_f16m1(new_x, x, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vmerge_vvm_f16m1(x, signed_x, mask, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pfloor<PacketXh>(const PacketXh& a) {
|
||||
PacketXh tmp = print<PacketXh>(a);
|
||||
// If greater, subtract one.
|
||||
PacketMask16 mask = __riscv_vmflt_vv_f16m1_b16(a, tmp, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vfsub_vf_f16m1_tum(mask, tmp, tmp, static_cast<Eigen::half>(1.0), unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh preverse(const PacketXh& a) {
|
||||
PacketXsu idx = __riscv_vrsub_vx_u16m1(__riscv_vid_v_u16m1(unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size - 1, unpacket_traits<PacketXh>::size);
|
||||
return __riscv_vrgather_vv_f16m1(a, idx, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux<PacketXh>(const PacketXh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredusum_vs_f16m1_f16m1(
|
||||
a, __riscv_vfmv_v_f_f16m1(static_cast<Eigen::half>(0.0), unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_mul<PacketXh>(const PacketXh& a) {
|
||||
// Multiply the vector by its reverse
|
||||
PacketXh prod = __riscv_vfmul_vv_f16m1(preverse(a), a, unpacket_traits<PacketXh>::size);
|
||||
PacketXh half_prod;
|
||||
|
||||
if (EIGEN_RISCV64_RVV_VL >= 1024) {
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 16, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
if (EIGEN_RISCV64_RVV_VL >= 512) {
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 8, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
if (EIGEN_RISCV64_RVV_VL >= 256) {
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 4, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
// Last reduction
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 2, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
|
||||
half_prod = __riscv_vslidedown_vx_f16m1(prod, 1, unpacket_traits<PacketXh>::size);
|
||||
prod = __riscv_vfmul_vv_f16m1(prod, half_prod, unpacket_traits<PacketXh>::size);
|
||||
|
||||
// The reduction is done to the first element.
|
||||
return pfirst(prod);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_min<PacketXh>(const PacketXh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmin_vs_f16m1_f16m1(
|
||||
a, __riscv_vfmv_v_f_f16m1((std::numeric_limits<Eigen::half>::max)(), unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_max<PacketXh>(const PacketXh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmax_vs_f16m1_f16m1(
|
||||
a, __riscv_vfmv_v_f_f16m1(-(std::numeric_limits<Eigen::half>::max)(), unpacket_traits<PacketXh>::size),
|
||||
unpacket_traits<PacketXh>::size)));
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketXh, N>& kernel) {
|
||||
Eigen::half buffer[unpacket_traits<PacketXh>::size * N];
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
__riscv_vsse16(reinterpret_cast<_Float16*>(&buffer[i]), N * sizeof(Eigen::half), kernel.packet[i],
|
||||
unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
kernel.packet[i] = __riscv_vle16_v_f16m1(reinterpret_cast<_Float16*>(&buffer[i * unpacket_traits<PacketXh>::size]),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketMul2Xf half2float(const PacketXh& a) {
|
||||
return __riscv_vfwcvt_f_f_v_f32m2(a, unpacket_traits<PacketMul2Xf>::size);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketXh float2half(const PacketMul2Xf& a) {
|
||||
return __riscv_vfncvt_f_f_w_f16m1(a, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
/********************************* PacketMul2Xh ************************************/
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh ptrue<PacketMul2Xh>(const PacketMul2Xh& /*a*/) {
|
||||
return __riscv_vreinterpret_f16m2(__riscv_vmv_v_x_u16m2(0xffffu, unpacket_traits<PacketMul2Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pzero<PacketMul2Xh>(const PacketMul2Xh& /*a*/) {
|
||||
return __riscv_vfmv_v_f_f16m2(static_cast<Eigen::half>(0.0), unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pabs(const PacketMul2Xh& a) {
|
||||
return __riscv_vfabs_v_f16m2(a, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pset1<PacketMul2Xh>(const Eigen::half& from) {
|
||||
return __riscv_vfmv_v_f_f16m2(static_cast<_Float16>(from), unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pset1frombits<PacketMul2Xh>(numext::uint16_t from) {
|
||||
return __riscv_vreinterpret_f16m2(__riscv_vmv_v_x_u16m2(from, unpacket_traits<PacketMul2Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh plset<PacketMul2Xh>(const Eigen::half& a) {
|
||||
PacketMul2Xh idx = __riscv_vfcvt_f_x_v_f16m2(__riscv_vid_v_i16m2(unpacket_traits<PacketMul4Xs>::size),
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
return __riscv_vfadd_vf_f16m2(idx, a, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh padd<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vfadd_vv_f16m2(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh psub<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vfsub_vv_f16m2(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pnegate(const PacketMul2Xh& a) {
|
||||
return __riscv_vfneg_v_f16m2(a, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pconj(const PacketMul2Xh& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pmul<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vfmul_vv_f16m2(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pdiv<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vfdiv_vv_f16m2(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pmadd(const PacketMul2Xh& a, const PacketMul2Xh& b, const PacketMul2Xh& c) {
|
||||
return __riscv_vfmadd_vv_f16m2(a, b, c, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pmsub(const PacketMul2Xh& a, const PacketMul2Xh& b, const PacketMul2Xh& c) {
|
||||
return __riscv_vfmsub_vv_f16m2(a, b, c, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pnmadd(const PacketMul2Xh& a, const PacketMul2Xh& b, const PacketMul2Xh& c) {
|
||||
return __riscv_vfnmsub_vv_f16m2(a, b, c, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pnmsub(const PacketMul2Xh& a, const PacketMul2Xh& b, const PacketMul2Xh& c) {
|
||||
return __riscv_vfnmadd_vv_f16m2(a, b, c, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pmin<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
PacketMul2Xh nans =
|
||||
__riscv_vfmv_v_f_f16m2((std::numeric_limits<Eigen::half>::quiet_NaN)(), unpacket_traits<PacketMul2Xh>::size);
|
||||
PacketMask8 mask = __riscv_vmfeq_vv_f16m2_b8(a, a, unpacket_traits<PacketMul2Xh>::size);
|
||||
PacketMask8 mask2 = __riscv_vmfeq_vv_f16m2_b8(b, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
mask = __riscv_vmand_mm_b8(mask, mask2, unpacket_traits<PacketMul2Xh>::size);
|
||||
|
||||
return __riscv_vfmin_vv_f16m2_tum(mask, nans, a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pmin<PropagateNaN, PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return pmin<PacketMul2Xh>(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pmin<PropagateNumbers, PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vfmin_vv_f16m2(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pmax<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
PacketMul2Xh nans =
|
||||
__riscv_vfmv_v_f_f16m2((std::numeric_limits<Eigen::half>::quiet_NaN)(), unpacket_traits<PacketMul2Xh>::size);
|
||||
PacketMask8 mask = __riscv_vmfeq_vv_f16m2_b8(a, a, unpacket_traits<PacketMul2Xh>::size);
|
||||
PacketMask8 mask2 = __riscv_vmfeq_vv_f16m2_b8(b, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
mask = __riscv_vmand_mm_b8(mask, mask2, unpacket_traits<PacketMul2Xh>::size);
|
||||
|
||||
return __riscv_vfmax_vv_f16m2_tum(mask, nans, a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pmax<PropagateNaN, PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return pmax<PacketMul2Xh>(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pmax<PropagateNumbers, PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vfmax_vv_f16m2(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pcmp_le<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
PacketMask8 mask = __riscv_vmfle_vv_f16m2_b8(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
return __riscv_vmerge_vvm_f16m2(pzero<PacketMul2Xh>(a), ptrue<PacketMul2Xh>(a), mask,
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pcmp_lt<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
PacketMask8 mask = __riscv_vmflt_vv_f16m2_b8(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
return __riscv_vmerge_vvm_f16m2(pzero<PacketMul2Xh>(a), ptrue<PacketMul2Xh>(a), mask,
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pcmp_eq<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
PacketMask8 mask = __riscv_vmfeq_vv_f16m2_b8(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
return __riscv_vmerge_vvm_f16m2(pzero<PacketMul2Xh>(a), ptrue<PacketMul2Xh>(a), mask,
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pcmp_lt_or_nan<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
PacketMask8 mask = __riscv_vmfge_vv_f16m2_b8(a, b, unpacket_traits<PacketMul2Xh>::size);
|
||||
return __riscv_vfmerge_vfm_f16m2(ptrue<PacketMul2Xh>(a), static_cast<Eigen::half>(0.0), mask,
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
// Logical Operations are not supported for half, so reinterpret casts
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pand<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vreinterpret_v_u16m2_f16m2(__riscv_vand_vv_u16m2(__riscv_vreinterpret_v_f16m2_u16m2(a),
|
||||
__riscv_vreinterpret_v_f16m2_u16m2(b),
|
||||
unpacket_traits<PacketMul2Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh por<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vreinterpret_v_u16m2_f16m2(__riscv_vor_vv_u16m2(__riscv_vreinterpret_v_f16m2_u16m2(a),
|
||||
__riscv_vreinterpret_v_f16m2_u16m2(b),
|
||||
unpacket_traits<PacketMul2Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pxor<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vreinterpret_v_u16m2_f16m2(__riscv_vxor_vv_u16m2(__riscv_vreinterpret_v_f16m2_u16m2(a),
|
||||
__riscv_vreinterpret_v_f16m2_u16m2(b),
|
||||
unpacket_traits<PacketMul2Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pandnot<PacketMul2Xh>(const PacketMul2Xh& a, const PacketMul2Xh& b) {
|
||||
return __riscv_vreinterpret_v_u16m2_f16m2(__riscv_vand_vv_u16m2(
|
||||
__riscv_vreinterpret_v_f16m2_u16m2(a),
|
||||
__riscv_vnot_v_u16m2(__riscv_vreinterpret_v_f16m2_u16m2(b), unpacket_traits<PacketMul2Xh>::size),
|
||||
unpacket_traits<PacketMul2Xh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pload<PacketMul2Xh>(const Eigen::half* from) {
|
||||
EIGEN_DEBUG_ALIGNED_LOAD return __riscv_vle16_v_f16m2(reinterpret_cast<const _Float16*>(from),
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh ploadu<PacketMul2Xh>(const Eigen::half* from) {
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD return __riscv_vle16_v_f16m2(reinterpret_cast<const _Float16*>(from),
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh ploaddup<PacketMul2Xh>(const Eigen::half* from) {
|
||||
PacketMul2Xsu idx = __riscv_vid_v_u16m2(unpacket_traits<PacketMul2Xh>::size);
|
||||
idx = __riscv_vand_vx_u16m2(idx, 0xfffeu, unpacket_traits<PacketMul2Xh>::size);
|
||||
return __riscv_vloxei16_v_f16m2(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh ploadquad<PacketMul2Xh>(const Eigen::half* from) {
|
||||
PacketMul2Xsu idx = __riscv_vid_v_u16m2(unpacket_traits<PacketMul2Xh>::size);
|
||||
idx = __riscv_vsrl_vx_u16m2(__riscv_vand_vx_u16m2(idx, 0xfffcu, unpacket_traits<PacketMul2Xh>::size), 1,
|
||||
unpacket_traits<PacketMul2Xs>::size);
|
||||
return __riscv_vloxei16_v_f16m2(reinterpret_cast<const _Float16*>(from), idx, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const PacketMul2Xh& from) {
|
||||
EIGEN_DEBUG_ALIGNED_STORE __riscv_vse16_v_f16m2(reinterpret_cast<_Float16*>(to), from,
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const PacketMul2Xh& from) {
|
||||
EIGEN_DEBUG_UNALIGNED_STORE __riscv_vse16_v_f16m2(reinterpret_cast<_Float16*>(to), from,
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline PacketMul2Xh pgather<Eigen::half, PacketMul2Xh>(const Eigen::half* from, Index stride) {
|
||||
return __riscv_vlse16_v_f16m2(reinterpret_cast<const _Float16*>(from), stride * sizeof(Eigen::half),
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline void pscatter<Eigen::half, PacketMul2Xh>(Eigen::half* to, const PacketMul2Xh& from,
|
||||
Index stride) {
|
||||
__riscv_vsse16(reinterpret_cast<_Float16*>(to), stride * sizeof(Eigen::half), from,
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half pfirst<PacketMul2Xh>(const PacketMul2Xh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f_s_f16m2_f16(a));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh psqrt(const PacketMul2Xh& a) {
|
||||
return __riscv_vfsqrt_v_f16m2(a, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh print<PacketMul2Xh>(const PacketMul2Xh& a) {
|
||||
const PacketMul2Xh limit = pset1<PacketMul2Xh>(static_cast<Eigen::half>(1 << 10));
|
||||
const PacketMul2Xh abs_a = pabs(a);
|
||||
|
||||
PacketMask8 mask = __riscv_vmfne_vv_f16m2_b8(a, a, unpacket_traits<PacketMul2Xh>::size);
|
||||
const PacketMul2Xh x = __riscv_vfadd_vv_f16m2_tum(mask, a, a, a, unpacket_traits<PacketMul2Xh>::size);
|
||||
const PacketMul2Xh new_x = __riscv_vfcvt_f_x_v_f16m2(
|
||||
__riscv_vfcvt_x_f_v_i16m2(a, unpacket_traits<PacketMul2Xh>::size), unpacket_traits<PacketMul2Xh>::size);
|
||||
|
||||
mask = __riscv_vmflt_vv_f16m2_b8(abs_a, limit, unpacket_traits<PacketMul2Xh>::size);
|
||||
PacketMul2Xh signed_x = __riscv_vfsgnj_vv_f16m2(new_x, x, unpacket_traits<PacketMul2Xh>::size);
|
||||
return __riscv_vmerge_vvm_f16m2(x, signed_x, mask, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pfloor<PacketMul2Xh>(const PacketMul2Xh& a) {
|
||||
PacketMul2Xh tmp = print<PacketMul2Xh>(a);
|
||||
// If greater, subtract one.
|
||||
PacketMask8 mask = __riscv_vmflt_vv_f16m2_b8(a, tmp, unpacket_traits<PacketMul2Xh>::size);
|
||||
return __riscv_vfsub_vf_f16m2_tum(mask, tmp, tmp, static_cast<Eigen::half>(1.0), unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh preverse(const PacketMul2Xh& a) {
|
||||
PacketMul2Xsu idx =
|
||||
__riscv_vrsub_vx_u16m2(__riscv_vid_v_u16m2(unpacket_traits<PacketMul2Xh>::size),
|
||||
unpacket_traits<PacketMul2Xh>::size - 1, unpacket_traits<PacketMul2Xh>::size);
|
||||
return __riscv_vrgather_vv_f16m2(a, idx, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux<PacketMul2Xh>(const PacketMul2Xh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredusum_vs_f16m2_f16m1(
|
||||
a, __riscv_vfmv_v_f_f16m1(static_cast<Eigen::half>(0.0), unpacket_traits<PacketMul2Xh>::size / 4),
|
||||
unpacket_traits<PacketMul2Xh>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_mul<PacketMul2Xh>(const PacketMul2Xh& a) {
|
||||
return predux_mul<PacketXh>(__riscv_vfmul_vv_f16m1(__riscv_vget_v_f16m2_f16m1(a, 0), __riscv_vget_v_f16m2_f16m1(a, 1),
|
||||
unpacket_traits<PacketXh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_min<PacketMul2Xh>(const PacketMul2Xh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmin_vs_f16m2_f16m1(
|
||||
a, __riscv_vfmv_v_f_f16m1((std::numeric_limits<Eigen::half>::max)(), unpacket_traits<PacketMul2Xh>::size / 4),
|
||||
unpacket_traits<PacketMul2Xh>::size)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_max<PacketMul2Xh>(const PacketMul2Xh& a) {
|
||||
return static_cast<Eigen::half>(__riscv_vfmv_f(__riscv_vfredmax_vs_f16m2_f16m1(
|
||||
a, __riscv_vfmv_v_f_f16m1(-(std::numeric_limits<Eigen::half>::max)(), unpacket_traits<PacketMul2Xh>::size / 4),
|
||||
unpacket_traits<PacketMul2Xh>::size)));
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<PacketMul2Xh, N>& kernel) {
|
||||
Eigen::half buffer[unpacket_traits<PacketMul2Xh>::size * N];
|
||||
int i = 0;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
__riscv_vsse16(reinterpret_cast<_Float16*>(&buffer[i]), N * sizeof(Eigen::half), kernel.packet[i],
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
kernel.packet[i] =
|
||||
__riscv_vle16_v_f16m2(reinterpret_cast<_Float16*>(&buffer[i * unpacket_traits<PacketMul2Xh>::size]),
|
||||
unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketMul4Xf half2float(const PacketMul2Xh& a) {
|
||||
return __riscv_vfwcvt_f_f_v_f32m4(a, unpacket_traits<PacketMul4Xf>::size);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh float2half(const PacketMul4Xf& a) {
|
||||
return __riscv_vfncvt_f_f_w_f16m2(a, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <typename Packet = PacketMul2Xh>
|
||||
EIGEN_STRONG_INLINE
|
||||
typename std::enable_if<std::is_same<Packet, PacketMul2Xh>::value && (unpacket_traits<PacketMul2Xh>::size % 8) == 0,
|
||||
PacketXh>::type
|
||||
predux_half_dowto4(const PacketMul2Xh& a) {
|
||||
return __riscv_vfadd_vv_f16m1(__riscv_vget_v_f16m2_f16m1(a, 0), __riscv_vget_v_f16m2_f16m1(a, 1),
|
||||
unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, pcos)
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, pexp)
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, pexpm1)
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, plog)
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, plog1p)
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, plog2)
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, preciprocal)
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, prsqrt)
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, psin)
|
||||
F16_PACKET_FUNCTION(PacketMul2Xf, PacketXh, ptanh)
|
||||
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, pcos)
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, pexp)
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, pexpm1)
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, plog)
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, plog1p)
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, plog2)
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, preciprocal)
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, prsqrt)
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, psin)
|
||||
F16_PACKET_FUNCTION(PacketMul4Xf, PacketMul2Xh, ptanh)
|
||||
|
||||
/********************************* casting ************************************/
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<_Float16, numext::int16_t> {
|
||||
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<numext::int16_t, _Float16> {
|
||||
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh pcast<PacketXs, PacketXh>(const PacketXs& a) {
|
||||
return __riscv_vfcvt_f_x_v_f16m1(a, unpacket_traits<PacketXs>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXs pcast<PacketXh, PacketXs>(const PacketXh& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXh preinterpret<PacketXh, PacketXs>(const PacketXs& a) {
|
||||
return __riscv_vreinterpret_v_i16m1_f16m1(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXs preinterpret<PacketXs, PacketXh>(const PacketXh& a) {
|
||||
return __riscv_vreinterpret_v_f16m1_i16m1(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pcast<PacketMul2Xs, PacketMul2Xh>(const PacketMul2Xs& a) {
|
||||
return __riscv_vfcvt_f_x_v_f16m2(a, unpacket_traits<PacketMul2Xs>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xs pcast<PacketMul2Xh, PacketMul2Xs>(const PacketMul2Xh& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i16m2(a, unpacket_traits<PacketMul2Xh>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh preinterpret<PacketMul2Xh, PacketMul2Xs>(const PacketMul2Xs& a) {
|
||||
return __riscv_vreinterpret_v_i16m2_f16m2(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xs preinterpret<PacketMul2Xs, PacketMul2Xh>(const PacketMul2Xh& a) {
|
||||
return __riscv_vreinterpret_v_f16m2_i16m2(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xs pcast<PacketXh, PacketMul4Xs>(const PacketXh& a, const PacketXh& b, const PacketXh& c,
|
||||
const PacketXh& d) {
|
||||
return __riscv_vcreate_v_i16m1_i16m4(__riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(b, unpacket_traits<PacketXh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(c, unpacket_traits<PacketXh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(d, unpacket_traits<PacketXh>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pcast<PacketXs, PacketMul2Xh>(const PacketXs& a, const PacketXs& b) {
|
||||
return __riscv_vcreate_v_f16m1_f16m2(__riscv_vfcvt_f_x_v_f16m1(a, unpacket_traits<PacketXs>::size),
|
||||
__riscv_vfcvt_f_x_v_f16m1(b, unpacket_traits<PacketXs>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xh pcast<PacketXh, PacketMul2Xh>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vcreate_v_f16m1_f16m2(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xs pcast<PacketXh, PacketMul2Xs>(const PacketXh& a, const PacketXh& b) {
|
||||
return __riscv_vcreate_v_i16m1_i16m2(__riscv_vfcvt_rtz_x_f_v_i16m1(a, unpacket_traits<PacketXh>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i16m1(b, unpacket_traits<PacketXh>::size));
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_PACKET_MATH_FP16_RVV10_H
|
284
Eigen/src/Core/arch/RVV10/TypeCasting.h
Normal file
284
Eigen/src/Core/arch/RVV10/TypeCasting.h
Normal file
@ -0,0 +1,284 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2024 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_TYPE_CASTING_RVV10_H
|
||||
#define EIGEN_TYPE_CASTING_RVV10_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/********************************* 32 bits ************************************/
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<float, numext::int32_t> {
|
||||
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<numext::int32_t, float> {
|
||||
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf pcast<PacketXi, PacketXf>(const PacketXi& a) {
|
||||
return __riscv_vfcvt_f_x_v_f32m1(a, unpacket_traits<PacketXi>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi pcast<PacketXf, PacketXi>(const PacketXf& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i32m1(a, unpacket_traits<PacketXf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXf preinterpret<PacketXf, PacketXi>(const PacketXi& a) {
|
||||
return __riscv_vreinterpret_v_i32m1_f32m1(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXi preinterpret<PacketXi, PacketXf>(const PacketXf& a) {
|
||||
return __riscv_vreinterpret_v_f32m1_i32m1(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xf pcast<PacketMul4Xi, PacketMul4Xf>(const PacketMul4Xi& a) {
|
||||
return __riscv_vfcvt_f_x_v_f32m4(a, unpacket_traits<PacketMul4Xi>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xi pcast<PacketMul4Xf, PacketMul4Xi>(const PacketMul4Xf& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i32m4(a, unpacket_traits<PacketMul4Xf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xf preinterpret<PacketMul4Xf, PacketMul4Xi>(const PacketMul4Xi& a) {
|
||||
return __riscv_vreinterpret_v_i32m4_f32m4(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xi preinterpret<PacketMul4Xi, PacketMul4Xf>(const PacketMul4Xf& a) {
|
||||
return __riscv_vreinterpret_v_f32m4_i32m4(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xf pcast<PacketMul2Xi, PacketMul2Xf>(const PacketMul2Xi& a) {
|
||||
return __riscv_vfcvt_f_x_v_f32m2(a, unpacket_traits<PacketMul2Xi>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xi pcast<PacketMul2Xf, PacketMul2Xi>(const PacketMul2Xf& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i32m2(a, unpacket_traits<PacketMul2Xf>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xf preinterpret<PacketMul2Xf, PacketMul2Xi>(const PacketMul2Xi& a) {
|
||||
return __riscv_vreinterpret_v_i32m2_f32m2(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xi preinterpret<PacketMul2Xi, PacketMul2Xf>(const PacketMul2Xf& a) {
|
||||
return __riscv_vreinterpret_v_f32m2_i32m2(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xi pcast<PacketXi, PacketMul4Xi>(const PacketXi& a, const PacketXi& b, const PacketXi& c,
|
||||
const PacketXi& d) {
|
||||
return __riscv_vcreate_v_i32m1_i32m4(a, b, c, d);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xf pcast<PacketXi, PacketMul4Xf>(const PacketXi& a, const PacketXi& b, const PacketXi& c,
|
||||
const PacketXi& d) {
|
||||
return __riscv_vcreate_v_f32m1_f32m4(__riscv_vfcvt_f_x_v_f32m1(a, unpacket_traits<PacketXi>::size),
|
||||
__riscv_vfcvt_f_x_v_f32m1(b, unpacket_traits<PacketXi>::size),
|
||||
__riscv_vfcvt_f_x_v_f32m1(c, unpacket_traits<PacketXi>::size),
|
||||
__riscv_vfcvt_f_x_v_f32m1(d, unpacket_traits<PacketXi>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xf pcast<PacketXf, PacketMul4Xf>(const PacketXf& a, const PacketXf& b, const PacketXf& c,
|
||||
const PacketXf& d) {
|
||||
return __riscv_vcreate_v_f32m1_f32m4(a, b, c, d);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xi pcast<PacketXf, PacketMul4Xi>(const PacketXf& a, const PacketXf& b, const PacketXf& c,
|
||||
const PacketXf& d) {
|
||||
return __riscv_vcreate_v_i32m1_i32m4(__riscv_vfcvt_rtz_x_f_v_i32m1(a, unpacket_traits<PacketXf>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i32m1(b, unpacket_traits<PacketXf>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i32m1(c, unpacket_traits<PacketXf>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i32m1(d, unpacket_traits<PacketXf>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xi pcast<PacketXi, PacketMul2Xi>(const PacketXi& a, const PacketXi& b) {
|
||||
return __riscv_vcreate_v_i32m1_i32m2(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xf pcast<PacketXi, PacketMul2Xf>(const PacketXi& a, const PacketXi& b) {
|
||||
return __riscv_vcreate_v_f32m1_f32m2(__riscv_vfcvt_f_x_v_f32m1(a, unpacket_traits<PacketXi>::size),
|
||||
__riscv_vfcvt_f_x_v_f32m1(b, unpacket_traits<PacketXi>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xf pcast<PacketXf, PacketMul2Xf>(const PacketXf& a, const PacketXf& b) {
|
||||
return __riscv_vcreate_v_f32m1_f32m2(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xi pcast<PacketXf, PacketMul2Xi>(const PacketXf& a, const PacketXf& b) {
|
||||
return __riscv_vcreate_v_i32m1_i32m2(__riscv_vfcvt_rtz_x_f_v_i32m1(a, unpacket_traits<PacketXf>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i32m1(b, unpacket_traits<PacketXf>::size));
|
||||
}
|
||||
|
||||
/********************************* 64 bits ************************************/
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<double, numext::int64_t> {
|
||||
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
struct type_casting_traits<numext::int64_t, double> {
|
||||
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd pcast<PacketXl, PacketXd>(const PacketXl& a) {
|
||||
return __riscv_vfcvt_f_x_v_f64m1(a, unpacket_traits<PacketXl>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl pcast<PacketXd, PacketXl>(const PacketXd& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i64m1(a, unpacket_traits<PacketXd>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXd preinterpret<PacketXd, PacketXl>(const PacketXl& a) {
|
||||
return __riscv_vreinterpret_v_i64m1_f64m1(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketXl preinterpret<PacketXl, PacketXd>(const PacketXd& a) {
|
||||
return __riscv_vreinterpret_v_f64m1_i64m1(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xd pcast<PacketMul4Xl, PacketMul4Xd>(const PacketMul4Xl& a) {
|
||||
return __riscv_vfcvt_f_x_v_f64m4(a, unpacket_traits<PacketMul4Xl>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xl pcast<PacketMul4Xd, PacketMul4Xl>(const PacketMul4Xd& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i64m4(a, unpacket_traits<PacketMul4Xd>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xd preinterpret<PacketMul4Xd, PacketMul4Xl>(const PacketMul4Xl& a) {
|
||||
return __riscv_vreinterpret_v_i64m4_f64m4(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xl preinterpret<PacketMul4Xl, PacketMul4Xd>(const PacketMul4Xd& a) {
|
||||
return __riscv_vreinterpret_v_f64m4_i64m4(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xd pcast<PacketMul2Xl, PacketMul2Xd>(const PacketMul2Xl& a) {
|
||||
return __riscv_vfcvt_f_x_v_f64m2(a, unpacket_traits<PacketMul2Xl>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xl pcast<PacketMul2Xd, PacketMul2Xl>(const PacketMul2Xd& a) {
|
||||
return __riscv_vfcvt_rtz_x_f_v_i64m2(a, unpacket_traits<PacketMul2Xd>::size);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xd preinterpret<PacketMul2Xd, PacketMul2Xl>(const PacketMul2Xl& a) {
|
||||
return __riscv_vreinterpret_v_i64m2_f64m2(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xl preinterpret<PacketMul2Xl, PacketMul2Xd>(const PacketMul2Xd& a) {
|
||||
return __riscv_vreinterpret_v_f64m2_i64m2(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xl pcast<PacketXl, PacketMul4Xl>(const PacketXl& a, const PacketXl& b, const PacketXl& c,
|
||||
const PacketXl& d) {
|
||||
return __riscv_vcreate_v_i64m1_i64m4(a, b, c, d);
|
||||
;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xd pcast<PacketXl, PacketMul4Xd>(const PacketXl& a, const PacketXl& b, const PacketXl& c,
|
||||
const PacketXl& d) {
|
||||
return __riscv_vcreate_v_f64m1_f64m4(__riscv_vfcvt_f_x_v_f64m1(a, unpacket_traits<PacketXl>::size),
|
||||
__riscv_vfcvt_f_x_v_f64m1(b, unpacket_traits<PacketXl>::size),
|
||||
__riscv_vfcvt_f_x_v_f64m1(c, unpacket_traits<PacketXl>::size),
|
||||
__riscv_vfcvt_f_x_v_f64m1(d, unpacket_traits<PacketXl>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xd pcast<PacketXd, PacketMul4Xd>(const PacketXd& a, const PacketXd& b, const PacketXd& c,
|
||||
const PacketXd& d) {
|
||||
return __riscv_vcreate_v_f64m1_f64m4(a, b, c, d);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xl pcast<PacketXd, PacketMul4Xl>(const PacketXd& a, const PacketXd& b, const PacketXd& c,
|
||||
const PacketXd& d) {
|
||||
return __riscv_vcreate_v_i64m1_i64m4(__riscv_vfcvt_rtz_x_f_v_i64m1(a, unpacket_traits<PacketXd>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i64m1(b, unpacket_traits<PacketXd>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i64m1(c, unpacket_traits<PacketXd>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i64m1(d, unpacket_traits<PacketXd>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xl pcast<PacketXl, PacketMul2Xl>(const PacketXl& a, const PacketXl& b) {
|
||||
return __riscv_vcreate_v_i64m1_i64m2(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xd pcast<PacketXl, PacketMul2Xd>(const PacketXl& a, const PacketXl& b) {
|
||||
return __riscv_vcreate_v_f64m1_f64m2(__riscv_vfcvt_f_x_v_f64m1(a, unpacket_traits<PacketXl>::size),
|
||||
__riscv_vfcvt_f_x_v_f64m1(b, unpacket_traits<PacketXl>::size));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xd pcast<PacketXd, PacketMul2Xd>(const PacketXd& a, const PacketXd& b) {
|
||||
return __riscv_vcreate_v_f64m1_f64m2(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xl pcast<PacketXd, PacketMul2Xl>(const PacketXd& a, const PacketXd& b) {
|
||||
return __riscv_vcreate_v_i64m1_i64m2(__riscv_vfcvt_rtz_x_f_v_i64m1(a, unpacket_traits<PacketXd>::size),
|
||||
__riscv_vfcvt_rtz_x_f_v_i64m1(b, unpacket_traits<PacketXd>::size));
|
||||
}
|
||||
|
||||
/********************************* 16 bits ************************************/
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul2Xs pcast<PacketXs, PacketMul2Xs>(const PacketXs& a, const PacketXs& b) {
|
||||
return __riscv_vcreate_v_i16m1_i16m2(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE PacketMul4Xs pcast<PacketXs, PacketMul4Xs>(const PacketXs& a, const PacketXs& b, const PacketXs& c,
|
||||
const PacketXs& d) {
|
||||
return __riscv_vcreate_v_i16m1_i16m4(a, b, c, d);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_TYPE_CASTING_RVV10_H
|
@ -111,7 +111,11 @@ struct squared_norm_functor {
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const {
|
||||
#if defined EIGEN_VECTORIZE_RVV10
|
||||
return Packet(pmul(a.real, a.real), pmul(a.imag, a.imag));
|
||||
#else
|
||||
return Packet(pmul(a.v, a.v));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
template <typename Scalar>
|
||||
|
@ -38,10 +38,21 @@ template <typename LhsScalar, typename RhsScalar, int PacketSize_ = GEMVPacketFu
|
||||
class gemv_traits {
|
||||
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||
|
||||
#ifdef EIGEN_RISCV64_USE_RVV10
|
||||
#define PACKET_DECL_COND_POSTFIX(postfix, name, packet_size) \
|
||||
typedef typename std::conditional_t< \
|
||||
NumTraits<LhsScalar>::IsComplex || NumTraits<RhsScalar>::IsComplex, \
|
||||
typename packet_traits<name##Scalar, 2>::type, \
|
||||
typename gemv_packet_cond<packet_size, typename packet_traits<name##Scalar>::type, \
|
||||
typename packet_traits<name##Scalar>::half, \
|
||||
typename unpacket_traits<typename packet_traits<name##Scalar>::half>::half>::type> \
|
||||
name##Packet##postfix
|
||||
#else
|
||||
#define PACKET_DECL_COND_POSTFIX(postfix, name, packet_size) \
|
||||
typedef typename gemv_packet_cond< \
|
||||
packet_size, typename packet_traits<name##Scalar>::type, typename packet_traits<name##Scalar>::half, \
|
||||
typename unpacket_traits<typename packet_traits<name##Scalar>::half>::half>::type name##Packet##postfix
|
||||
#endif
|
||||
|
||||
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
|
||||
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
|
||||
|
@ -68,6 +68,8 @@
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
|
||||
#elif defined __HVX__ && (__HVX_LENGTH__ == 128)
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 128
|
||||
#elif defined(EIGEN_RISCV64_USE_RVV10)
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
|
||||
#else
|
||||
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
|
||||
#endif
|
||||
@ -104,7 +106,7 @@
|
||||
// Only static alignment is really problematic (relies on nonstandard compiler extensions),
|
||||
// try to keep heap alignment even when we have to disable static alignment.
|
||||
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || \
|
||||
EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64)
|
||||
EIGEN_ARCH_MIPS || EIGEN_ARCH_LOONGARCH64 || EIGEN_ARCH_RISCV)
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
|
||||
#else
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
|
||||
@ -406,14 +408,48 @@ extern "C" {
|
||||
#define EIGEN_VECTORIZE_SVE
|
||||
#include <arm_sve.h>
|
||||
|
||||
// Since we depend on knowing SVE vector lengths at compile-time, we need
|
||||
// to ensure a fixed lengths is set
|
||||
// Since we depend on knowing SVE vector length at compile-time, we need
|
||||
// to ensure a fixed length is set
|
||||
#if defined __ARM_FEATURE_SVE_BITS
|
||||
#define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
|
||||
#else
|
||||
#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
|
||||
#endif
|
||||
|
||||
#elif defined(EIGEN_ARCH_RISCV)
|
||||
|
||||
#if defined(__riscv_zfh)
|
||||
#define EIGEN_HAS_BUILTIN_FLOAT16
|
||||
#endif
|
||||
|
||||
// We currently require RVV to be enabled explicitly via EIGEN_RISCV64_USE_RVV and
|
||||
// will not select the backend automatically
|
||||
#if (defined EIGEN_RISCV64_USE_RVV10)
|
||||
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_RVV10
|
||||
#include <riscv_vector.h>
|
||||
|
||||
// Since we depend on knowing RVV vector length at compile-time, we need
|
||||
// to ensure a fixed length is set
|
||||
#if defined(__riscv_v_fixed_vlen)
|
||||
#define EIGEN_RISCV64_RVV_VL __riscv_v_fixed_vlen
|
||||
#if __riscv_v_fixed_vlen >= 256
|
||||
#undef EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT
|
||||
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
|
||||
#endif
|
||||
#else
|
||||
#error "Eigen requires a fixed RVV vector length but -mrvv-vector-bits=zvl is not set."
|
||||
#endif
|
||||
|
||||
#if defined(__riscv_zvfh) && defined(__riscv_zfh)
|
||||
#define EIGEN_VECTORIZE_RVV10FP16
|
||||
#elif defined(__riscv_zvfh)
|
||||
#error "The Eigen::Half vectorization requires Zfh and Zvfh extensions."
|
||||
#endif
|
||||
|
||||
#endif // defined(EIGEN_ARCH_RISCV)
|
||||
|
||||
#elif (defined __s390x__ && defined __VEC__)
|
||||
|
||||
#define EIGEN_VECTORIZE
|
||||
|
@ -475,6 +475,7 @@ enum Type {
|
||||
SVE = 0x6,
|
||||
HVX = 0x7,
|
||||
LSX = 0x8,
|
||||
RVV10 = 0x9,
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
Target = SSE
|
||||
#elif defined EIGEN_VECTORIZE_ALTIVEC
|
||||
@ -491,6 +492,8 @@ enum Type {
|
||||
Target = HVX
|
||||
#elif defined EIGEN_VECTORIZE_LSX
|
||||
Target = LSX
|
||||
#elif defined EIGEN_VECTORIZE_RVV10
|
||||
Target = RVV10
|
||||
#else
|
||||
Target = Generic
|
||||
#endif
|
||||
|
@ -404,6 +404,13 @@
|
||||
#define EIGEN_ARCH_PPC 0
|
||||
#endif
|
||||
|
||||
/// \internal EIGEN_ARCH_RISCV set to 1 if the architecture is RISC-V.
|
||||
#if defined(__riscv)
|
||||
#define EIGEN_ARCH_RISCV 1
|
||||
#else
|
||||
#define EIGEN_ARCH_RISCV 0
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------------------
|
||||
// Operating system identification, EIGEN_OS_*
|
||||
//------------------------------------------------------------------------------------------
|
||||
@ -976,7 +983,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void ignore_unused_variable(cons
|
||||
#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var);
|
||||
|
||||
#if !defined(EIGEN_ASM_COMMENT)
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64)
|
||||
#if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_RISCV)
|
||||
#define EIGEN_ASM_COMMENT(X) __asm__("#" X)
|
||||
#else
|
||||
#define EIGEN_ASM_COMMENT(X)
|
||||
|
@ -264,7 +264,7 @@ struct functor_cost {
|
||||
static constexpr Index Cost = plain_enum_max(nested_functor_cost<Xpr>::Cost, 1);
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
template <typename T, int LMul = 1>
|
||||
struct packet_traits;
|
||||
|
||||
template <typename T>
|
||||
@ -285,9 +285,12 @@ struct find_best_packet_helper<Size, PacketType, false> {
|
||||
typedef typename find_best_packet_helper<Size, typename unpacket_traits<PacketType>::half>::type type;
|
||||
};
|
||||
|
||||
template <typename T, int Size>
|
||||
template <typename T, int Size, int LMul = 1>
|
||||
struct find_best_packet;
|
||||
|
||||
template <typename T, int Size, int LMul>
|
||||
struct find_best_packet {
|
||||
typedef typename find_best_packet_helper<Size, typename packet_traits<T>::type>::type type;
|
||||
typedef typename find_best_packet_helper<Size, typename packet_traits<T, LMul>::type>::type type;
|
||||
};
|
||||
|
||||
template <int Size, typename PacketType,
|
||||
|
@ -345,7 +345,7 @@ EIGEN_DEVICE_FUNC void tridiagonalization_inplace(MatrixType& matA, CoeffVectorT
|
||||
|
||||
// Apply similarity transformation to remaining columns,
|
||||
// i.e., A = H A H' where H = I - h v v' and v = matA.col(i).tail(n-i-1)
|
||||
matA.col(i).coeffRef(i + 1) = 1;
|
||||
matA.col(i).coeffRef(i + 1) = (RealScalar)1;
|
||||
|
||||
hCoeffs.tail(n - i - 1).noalias() =
|
||||
(matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView<Lower>() *
|
||||
|
@ -301,12 +301,25 @@ template <typename Scalar, typename OtherScalar, int SizeAtCompileTime, int MinA
|
||||
struct apply_rotation_in_the_plane_selector<Scalar, OtherScalar, SizeAtCompileTime, MinAlignment,
|
||||
true /* vectorizable */> {
|
||||
static inline void run(Scalar* x, Index incrx, Scalar* y, Index incry, Index size, OtherScalar c, OtherScalar s) {
|
||||
#ifdef EIGEN_RISCV64_USE_RVV10
|
||||
typedef
|
||||
typename std::conditional_t<NumTraits<Scalar>::IsComplex || NumTraits<OtherScalar>::IsComplex,
|
||||
typename packet_traits<Scalar, 2>::type, typename packet_traits<Scalar, 4>::type>
|
||||
Packet;
|
||||
typedef typename std::conditional_t<NumTraits<Scalar>::IsComplex || NumTraits<OtherScalar>::IsComplex,
|
||||
typename packet_traits<OtherScalar, 2>::type,
|
||||
typename packet_traits<OtherScalar, 4>::type>
|
||||
OtherPacket;
|
||||
|
||||
constexpr Index PacketSize = unpacket_traits<Packet>::size;
|
||||
#else
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
typedef typename packet_traits<OtherScalar>::type OtherPacket;
|
||||
|
||||
constexpr int RequiredAlignment =
|
||||
(std::max)(unpacket_traits<Packet>::alignment, unpacket_traits<OtherPacket>::alignment);
|
||||
constexpr Index PacketSize = packet_traits<Scalar>::size;
|
||||
#endif
|
||||
constexpr int RequiredAlignment =
|
||||
(std::max<int>)(unpacket_traits<Packet>::alignment, unpacket_traits<OtherPacket>::alignment);
|
||||
|
||||
/*** dynamic-size vectorized paths ***/
|
||||
if (size >= 2 * PacketSize && SizeAtCompileTime == Dynamic && ((incrx == 1 && incry == 1) || PacketSize == 1)) {
|
||||
|
@ -1816,6 +1816,19 @@ EIGEN_DECLARE_TEST(packetmath) {
|
||||
CALL_SUBTEST_14((packetmath<bool, internal::packet_traits<bool>::type>()));
|
||||
CALL_SUBTEST_14((packetmath_scatter_gather<bool, internal::packet_traits<bool>::type>()));
|
||||
CALL_SUBTEST_15(test::runner<bfloat16>::run());
|
||||
#ifdef EIGEN_RISCV64_USE_RVV10
|
||||
CALL_SUBTEST_16((test::runner<float, typename internal::packet_traits<float, 2>::type>::run()));
|
||||
CALL_SUBTEST_17((test::runner<float, typename internal::packet_traits<float, 4>::type>::run()));
|
||||
CALL_SUBTEST_18((test::runner<double, typename internal::packet_traits<double, 2>::type>::run()));
|
||||
CALL_SUBTEST_19((test::runner<double, typename internal::packet_traits<double, 4>::type>::run()));
|
||||
CALL_SUBTEST_20((test::runner<int32_t, typename internal::packet_traits<int32_t, 2>::type>::run()));
|
||||
CALL_SUBTEST_21((test::runner<int32_t, typename internal::packet_traits<int32_t, 4>::type>::run()));
|
||||
CALL_SUBTEST_22((test::runner<int64_t, typename internal::packet_traits<int64_t, 2>::type>::run()));
|
||||
CALL_SUBTEST_23((test::runner<int64_t, typename internal::packet_traits<int64_t, 4>::type>::run()));
|
||||
CALL_SUBTEST_24((test::runner<int16_t, typename internal::packet_traits<int16_t, 2>::type>::run()));
|
||||
CALL_SUBTEST_25((test::runner<int16_t, typename internal::packet_traits<int16_t, 4>::type>::run()));
|
||||
CALL_SUBTEST_26((test::runner<half, typename internal::packet_traits<half, 2>::type>::run()));
|
||||
#endif
|
||||
g_first_pass = false;
|
||||
}
|
||||
}
|
||||
|
@ -107,7 +107,11 @@ template <typename Scalar, bool Enable = internal::packet_traits<Scalar>::Vector
|
||||
struct vectorization_logic {
|
||||
typedef internal::packet_traits<Scalar> PacketTraits;
|
||||
|
||||
#ifdef EIGEN_RISCV64_USE_RVV10
|
||||
typedef typename internal::packet_traits<Scalar, 4>::type PacketType;
|
||||
#else
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketType;
|
||||
#endif
|
||||
typedef typename internal::unpacket_traits<PacketType>::half HalfPacketType;
|
||||
enum {
|
||||
PacketSize = internal::unpacket_traits<PacketType>::size,
|
||||
|
Loading…
x
Reference in New Issue
Block a user