mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-13 12:19:12 +08:00
Cleaned up the fp16 code
This commit is contained in:
parent
40a16282c7
commit
ff4a289572
@ -849,7 +849,7 @@ EIGEN_DEVICE_FUNC
|
|||||||
inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
|
inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
|
||||||
{
|
{
|
||||||
return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
|
return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
|
@ -10,22 +10,24 @@
|
|||||||
#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H
|
#ifndef EIGEN_PACKET_MATH_HALF_CUDA_H
|
||||||
#define EIGEN_PACKET_MATH_HALF_CUDA_H
|
#define EIGEN_PACKET_MATH_HALF_CUDA_H
|
||||||
|
|
||||||
#if defined(EIGEN_HAS_CUDA_FP16)
|
//#if defined(EIGEN_HAS_CUDA_FP16)
|
||||||
|
|
||||||
// Make sure this is only available when targeting a GPU: we don't want to
|
// Make sure this is only available when targeting a GPU: we don't want to
|
||||||
// introduce conflicts between these packet_traits definitions and the ones
|
// introduce conflicts between these packet_traits definitions and the ones
|
||||||
// we'll use on the host side (SSE, AVX, ...)
|
// we'll use on the host side (SSE, AVX, ...)
|
||||||
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
//#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
||||||
|
|
||||||
|
|
||||||
// Most of the following operations require arch >= 3.0
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
// Most of the following operations require arch >= 3.0
|
||||||
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||||
|
|
||||||
template<> struct is_arithmetic<half2> { enum { value = true }; };
|
template<> struct is_arithmetic<half2> { enum { value = true }; };
|
||||||
|
|
||||||
template<> struct packet_traits<half> : default_packet_traits
|
template<> struct packet_traits<Eigen::half> : default_packet_traits
|
||||||
{
|
{
|
||||||
typedef half2 type;
|
typedef half2 type;
|
||||||
typedef half2 half;
|
typedef half2 half;
|
||||||
@ -43,35 +45,35 @@ template<> struct packet_traits<half> : default_packet_traits
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template<> struct unpacket_traits<half2> { typedef half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
|
template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const half& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
|
||||||
return __half2half2(from);
|
return __half2half2(from);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const half* from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
|
||||||
return *reinterpret_cast<const half2*>(from);
|
return *reinterpret_cast<const half2*>(from);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const half* from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu<half2>(const Eigen::half* from) {
|
||||||
return __halves2half2(from[0], from[1]);
|
return __halves2half2(from[0], from[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const half* from) {
|
template<> EIGEN_STRONG_INLINE half2 ploaddup<half2>(const Eigen::half* from) {
|
||||||
return __halves2half2(from[0], from[0]);
|
return __halves2half2(from[0], from[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<half>(half* to, const half2& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const half2& from) {
|
||||||
*reinterpret_cast<half2*>(to) = from;
|
*reinterpret_cast<half2*>(to) = from;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, const half2& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const half2& from) {
|
||||||
to[0] = __low2half(from);
|
to[0] = __low2half(from);
|
||||||
to[1] = __high2half(from);
|
to[1] = __high2half(from);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) {
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const Eigen::half* from) {
|
||||||
#if __CUDA_ARCH__ >= 350
|
#if __CUDA_ARCH__ >= 350
|
||||||
return __ldg((const half2*)from);
|
return __ldg((const half2*)from);
|
||||||
#else
|
#else
|
||||||
@ -80,7 +82,7 @@ template<>
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) {
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Eigen::half* from) {
|
||||||
#if __CUDA_ARCH__ >= 350
|
#if __CUDA_ARCH__ >= 350
|
||||||
return __halves2half2(__ldg(from+0), __ldg(from+1));
|
return __halves2half2(__ldg(from+0), __ldg(from+1));
|
||||||
#else
|
#else
|
||||||
@ -88,16 +90,16 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const ha
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) {
|
template<> EIGEN_DEVICE_FUNC inline half2 pgather<Eigen::half, half2>(const Eigen::half* from, Index stride) {
|
||||||
return __halves2half2(from[0*stride], from[1*stride]);
|
return __halves2half2(from[0*stride], from[1*stride]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<half, half2>(half* to, const half2& from, Index stride) {
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<Eigen::half, half2>(Eigen::half* to, const half2& from, Index stride) {
|
||||||
to[stride*0] = __low2half(from);
|
to[stride*0] = __low2half(from);
|
||||||
to[stride*1] = __high2half(from);
|
to[stride*1] = __high2half(from);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline half pfirst<half2>(const half2& a) {
|
template<> EIGEN_DEVICE_FUNC inline Eigen::half pfirst<half2>(const half2& a) {
|
||||||
return __low2half(a);
|
return __low2half(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,15 +112,15 @@ template<> EIGEN_DEVICE_FUNC inline half2 pabs<half2>(const half2& a) {
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline void
|
EIGEN_DEVICE_FUNC inline void
|
||||||
ptranspose(PacketBlock<half2,2>& kernel) {
|
ptranspose(PacketBlock<half2,2>& kernel) {
|
||||||
half a1 = __low2half(kernel.packet[0]);
|
__half a1 = __low2half(kernel.packet[0]);
|
||||||
half a2 = __high2half(kernel.packet[0]);
|
__half a2 = __high2half(kernel.packet[0]);
|
||||||
half b1 = __low2half(kernel.packet[1]);
|
__half b1 = __low2half(kernel.packet[1]);
|
||||||
half b2 = __high2half(kernel.packet[1]);
|
__half b2 = __high2half(kernel.packet[1]);
|
||||||
kernel.packet[0] = __halves2half2(a1, b1);
|
kernel.packet[0] = __halves2half2(a1, b1);
|
||||||
kernel.packet[1] = __halves2half2(a2, b2);
|
kernel.packet[1] = __halves2half2(a2, b2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const Eigen::half& a) {
|
||||||
#if __CUDA_ARCH__ >= 530
|
#if __CUDA_ARCH__ >= 530
|
||||||
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
|
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
|
||||||
#else
|
#else
|
||||||
@ -227,17 +229,17 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2&
|
|||||||
return __halves2half2(r1, r2);
|
return __halves2half2(r1, r2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) {
|
template<> EIGEN_DEVICE_FUNC inline Eigen::half predux<half2>(const half2& a) {
|
||||||
#if __CUDA_ARCH__ >= 530
|
#if __CUDA_ARCH__ >= 530
|
||||||
return __hadd(__low2half(a), __high2half(a));
|
return __hadd(__low2half(a), __high2half(a));
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
float a2 = __high2float(a);
|
float a2 = __high2float(a);
|
||||||
return half(internal::raw_uint16_to_half(__float2half_rn(a1 + a2)));
|
return Eigen::half(internal::raw_uint16_to_half(__float2half_rn(a1 + a2)));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) {
|
template<> EIGEN_DEVICE_FUNC inline Eigen::half predux_max<half2>(const half2& a) {
|
||||||
#if __CUDA_ARCH__ >= 530
|
#if __CUDA_ARCH__ >= 530
|
||||||
__half first = __low2half(a);
|
__half first = __low2half(a);
|
||||||
__half second = __high2half(a);
|
__half second = __high2half(a);
|
||||||
@ -249,7 +251,7 @@ template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) {
|
template<> EIGEN_DEVICE_FUNC inline Eigen::half predux_min<half2>(const half2& a) {
|
||||||
#if __CUDA_ARCH__ >= 530
|
#if __CUDA_ARCH__ >= 530
|
||||||
__half first = __low2half(a);
|
__half first = __low2half(a);
|
||||||
__half second = __high2half(a);
|
__half second = __high2half(a);
|
||||||
@ -261,13 +263,13 @@ template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) {
|
template<> EIGEN_DEVICE_FUNC inline Eigen::half predux_mul<half2>(const half2& a) {
|
||||||
#if __CUDA_ARCH__ >= 530
|
#if __CUDA_ARCH__ >= 530
|
||||||
return __hmul(__low2half(a), __high2half(a));
|
return __hmul(__low2half(a), __high2half(a));
|
||||||
#else
|
#else
|
||||||
float a1 = __low2float(a);
|
float a1 = __low2float(a);
|
||||||
float a2 = __high2float(a);
|
float a2 = __high2float(a);
|
||||||
return half(internal::raw_uint16_to_half(__float2half_rn(a1 * a2)));
|
return Eigen::half(internal::raw_uint16_to_half(__float2half_rn(a1 * a2)));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -303,20 +305,7 @@ template<> EIGEN_DEVICE_FUNC inline half2 prsqrt<half2>(const half2& a) {
|
|||||||
return __floats2half2_rn(r1, r2);
|
return __floats2half2_rn(r1, r2);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
#elif defined EIGEN_VECTORIZE_AVX
|
||||||
|
|
||||||
} // end namespace Eigen
|
|
||||||
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else // EIGEN_HAS_CUDA_FP16
|
|
||||||
|
|
||||||
|
|
||||||
namespace Eigen {
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
#if defined EIGEN_VECTORIZE_AVX
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
__m128i x;
|
__m128i x;
|
||||||
@ -326,7 +315,7 @@ typedef struct {
|
|||||||
template<> struct is_arithmetic<Packet8h> { enum { value = true }; };
|
template<> struct is_arithmetic<Packet8h> { enum { value = true }; };
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct packet_traits<half> : default_packet_traits {
|
struct packet_traits<Eigen::half> : default_packet_traits {
|
||||||
typedef Packet8h type;
|
typedef Packet8h type;
|
||||||
// There is no half-size packet for Packet8h.
|
// There is no half-size packet for Packet8h.
|
||||||
typedef Packet8h half;
|
typedef Packet8h half;
|
||||||
@ -357,38 +346,38 @@ struct packet_traits<half> : default_packet_traits {
|
|||||||
|
|
||||||
template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16}; typedef Packet8h half; };
|
template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16}; typedef Packet8h half; };
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const half& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) {
|
||||||
Packet8h result;
|
Packet8h result;
|
||||||
result.x = _mm_set1_epi16(from.x);
|
result.x = _mm_set1_epi16(from.x);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half pfirst<Packet8h>(const Packet8h& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<Packet8h>(const Packet8h& from) {
|
||||||
return raw_uint16_to_half(static_cast<unsigned short>(_mm_extract_epi16(from.x, 0)));
|
return raw_uint16_to_half(static_cast<unsigned short>(_mm_extract_epi16(from.x, 0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h pload<Packet8h>(const half* from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h pload<Packet8h>(const Eigen::half* from) {
|
||||||
Packet8h result;
|
Packet8h result;
|
||||||
result.x = _mm_load_si128(reinterpret_cast<const __m128i*>(from));
|
result.x = _mm_load_si128(reinterpret_cast<const __m128i*>(from));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h ploadu<Packet8h>(const half* from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h ploadu<Packet8h>(const Eigen::half* from) {
|
||||||
Packet8h result;
|
Packet8h result;
|
||||||
result.x = _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
|
result.x = _mm_loadu_si128(reinterpret_cast<const __m128i*>(from));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<half>(half* to, const Packet8h& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet8h& from) {
|
||||||
_mm_store_si128((__m128i*)to, from.x);
|
_mm_store_si128((__m128i*)to, from.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, const Packet8h& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet8h& from) {
|
||||||
_mm_storeu_si128((__m128i*)to, from.x);
|
_mm_storeu_si128((__m128i*)to, from.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline Packet8h
|
template<> EIGEN_DEVICE_FUNC inline Packet8h
|
||||||
ploadquad(const half* from) {
|
ploadquad<Packet8h>(const Eigen::half* from) {
|
||||||
Packet8h result;
|
Packet8h result;
|
||||||
unsigned short a = from[0].x;
|
unsigned short a = from[0].x;
|
||||||
unsigned short b = from[1].x;
|
unsigned short b = from[1].x;
|
||||||
@ -400,7 +389,7 @@ EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h& a) {
|
|||||||
#ifdef EIGEN_HAS_FP16_C
|
#ifdef EIGEN_HAS_FP16_C
|
||||||
return _mm256_cvtph_ps(a.x);
|
return _mm256_cvtph_ps(a.x);
|
||||||
#else
|
#else
|
||||||
EIGEN_ALIGN32 half aux[8];
|
EIGEN_ALIGN32 Eigen::half aux[8];
|
||||||
pstore(aux, a);
|
pstore(aux, a);
|
||||||
float f0(aux[0]);
|
float f0(aux[0]);
|
||||||
float f1(aux[1]);
|
float f1(aux[1]);
|
||||||
@ -423,14 +412,14 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
|
|||||||
#else
|
#else
|
||||||
EIGEN_ALIGN32 float aux[8];
|
EIGEN_ALIGN32 float aux[8];
|
||||||
pstore(aux, a);
|
pstore(aux, a);
|
||||||
half h0(aux[0]);
|
Eigen::half h0(aux[0]);
|
||||||
half h1(aux[1]);
|
Eigen::half h1(aux[1]);
|
||||||
half h2(aux[2]);
|
Eigen::half h2(aux[2]);
|
||||||
half h3(aux[3]);
|
Eigen::half h3(aux[3]);
|
||||||
half h4(aux[4]);
|
Eigen::half h4(aux[4]);
|
||||||
half h5(aux[5]);
|
Eigen::half h5(aux[5]);
|
||||||
half h6(aux[6]);
|
Eigen::half h6(aux[6]);
|
||||||
half h7(aux[7]);
|
Eigen::half h7(aux[7]);
|
||||||
|
|
||||||
Packet8h result;
|
Packet8h result;
|
||||||
result.x = _mm_set_epi16(h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x);
|
result.x = _mm_set_epi16(h7.x, h6.x, h5.x, h4.x, h3.x, h2.x, h1.x, h0.x);
|
||||||
@ -438,6 +427,8 @@ EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f& a) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h pconj(const Packet8h& a) { return a; }
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h padd<Packet8h>(const Packet8h& a, const Packet8h& b) {
|
||||||
Packet8f af = half2float(a);
|
Packet8f af = half2float(a);
|
||||||
Packet8f bf = half2float(b);
|
Packet8f bf = half2float(b);
|
||||||
@ -452,16 +443,16 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet8h pmul<Packet8h>(const P
|
|||||||
return float2half(rf);
|
return float2half(rf);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline Packet8h pgather<half, Packet8h>(const half* from, Index stride)
|
template<> EIGEN_DEVICE_FUNC inline Packet8h pgather<Eigen::half, Packet8h>(const Eigen::half* from, Index stride)
|
||||||
{
|
{
|
||||||
Packet8h result;
|
Packet8h result;
|
||||||
result.x = _mm_set_epi16(from[7*stride].x, from[6*stride].x, from[5*stride].x, from[4*stride].x, from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
|
result.x = _mm_set_epi16(from[7*stride].x, from[6*stride].x, from[5*stride].x, from[4*stride].x, from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<half, Packet8h>(half* to, const Packet8h& from, Index stride)
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<Eigen::half, Packet8h>(Eigen::half* to, const Packet8h& from, Index stride)
|
||||||
{
|
{
|
||||||
EIGEN_ALIGN32 half aux[8];
|
EIGEN_ALIGN32 Eigen::half aux[8];
|
||||||
pstore(aux, from);
|
pstore(aux, from);
|
||||||
to[stride*0].x = aux[0].x;
|
to[stride*0].x = aux[0].x;
|
||||||
to[stride*1].x = aux[1].x;
|
to[stride*1].x = aux[1].x;
|
||||||
@ -523,13 +514,13 @@ ptranspose(PacketBlock<Packet8h,8>& kernel) {
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline void
|
EIGEN_DEVICE_FUNC inline void
|
||||||
ptranspose(PacketBlock<Packet8h,4>& kernel) {
|
ptranspose(PacketBlock<Packet8h,4>& kernel) {
|
||||||
EIGEN_ALIGN32 half in[4][8];
|
EIGEN_ALIGN32 Eigen::half in[4][8];
|
||||||
pstore<half>(in[0], kernel.packet[0]);
|
pstore<Eigen::half>(in[0], kernel.packet[0]);
|
||||||
pstore<half>(in[1], kernel.packet[1]);
|
pstore<Eigen::half>(in[1], kernel.packet[1]);
|
||||||
pstore<half>(in[2], kernel.packet[2]);
|
pstore<Eigen::half>(in[2], kernel.packet[2]);
|
||||||
pstore<half>(in[3], kernel.packet[3]);
|
pstore<Eigen::half>(in[3], kernel.packet[3]);
|
||||||
|
|
||||||
EIGEN_ALIGN32 half out[4][8];
|
EIGEN_ALIGN32 Eigen::half out[4][8];
|
||||||
|
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
for (int j = 0; j < 4; ++j) {
|
for (int j = 0; j < 4; ++j) {
|
||||||
@ -557,7 +548,7 @@ typedef struct {
|
|||||||
template<> struct is_arithmetic<Packet4h> { enum { value = true }; };
|
template<> struct is_arithmetic<Packet4h> { enum { value = true }; };
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct packet_traits<half> : default_packet_traits {
|
struct packet_traits<Eigen::half> : default_packet_traits {
|
||||||
typedef Packet4h type;
|
typedef Packet4h type;
|
||||||
// There is no half-size packet for Packet8h.
|
// There is no half-size packet for Packet8h.
|
||||||
typedef Packet4h half;
|
typedef Packet4h half;
|
||||||
@ -588,24 +579,26 @@ struct packet_traits<half> : default_packet_traits {
|
|||||||
|
|
||||||
template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16}; typedef Packet4h half; };
|
template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16}; typedef Packet4h half; };
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const half& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const Eigen::half& from) {
|
||||||
Packet4h result;
|
Packet4h result;
|
||||||
result.x = _mm_set1_pi16(from.x);
|
result.x = _mm_set1_pi16(from.x);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half pfirst<Packet4h>(const Packet4h& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h>(const Packet4h& from) {
|
||||||
return raw_uint16_to_half(static_cast<unsigned short>(_mm_cvtsi64_si32(from.x)));
|
return raw_uint16_to_half(static_cast<unsigned short>(_mm_cvtsi64_si32(from.x)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h pconj(const Packet4h& a) { return a; }
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h padd<Packet4h>(const Packet4h& a, const Packet4h& b) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h padd<Packet4h>(const Packet4h& a, const Packet4h& b) {
|
||||||
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
||||||
__int64_t b64 = _mm_cvtm64_si64(b.x);
|
__int64_t b64 = _mm_cvtm64_si64(b.x);
|
||||||
|
|
||||||
half h[4];
|
Eigen::half h[4];
|
||||||
|
|
||||||
half ha = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
Eigen::half ha = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
||||||
half hb = raw_uint16_to_half(static_cast<unsigned short>(b64));
|
Eigen::half hb = raw_uint16_to_half(static_cast<unsigned short>(b64));
|
||||||
h[0] = ha + hb;
|
h[0] = ha + hb;
|
||||||
ha = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
ha = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
||||||
hb = raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
|
hb = raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
|
||||||
@ -625,10 +618,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h pmul<Packet4h>(const P
|
|||||||
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
||||||
__int64_t b64 = _mm_cvtm64_si64(b.x);
|
__int64_t b64 = _mm_cvtm64_si64(b.x);
|
||||||
|
|
||||||
half h[4];
|
Eigen::half h[4];
|
||||||
|
|
||||||
half ha = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
Eigen::half ha = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
||||||
half hb = raw_uint16_to_half(static_cast<unsigned short>(b64));
|
Eigen::half hb = raw_uint16_to_half(static_cast<unsigned short>(b64));
|
||||||
h[0] = ha * hb;
|
h[0] = ha * hb;
|
||||||
ha = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
ha = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
||||||
hb = raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
|
hb = raw_uint16_to_half(static_cast<unsigned short>(b64 >> 16));
|
||||||
@ -644,36 +637,41 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h pmul<Packet4h>(const P
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h pload<Packet4h>(const half* from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h pload<Packet4h>(const Eigen::half* from) {
|
||||||
Packet4h result;
|
Packet4h result;
|
||||||
result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
|
result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h ploadu<Packet4h>(const half* from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h ploadu<Packet4h>(const Eigen::half* from) {
|
||||||
Packet4h result;
|
Packet4h result;
|
||||||
result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
|
result.x = _mm_cvtsi64_m64(*reinterpret_cast<const __int64_t*>(from));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<half>(half* to, const Packet4h& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet4h& from) {
|
||||||
__int64_t r = _mm_cvtm64_si64(from.x);
|
__int64_t r = _mm_cvtm64_si64(from.x);
|
||||||
*(reinterpret_cast<__int64_t*>(to)) = r;
|
*(reinterpret_cast<__int64_t*>(to)) = r;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, const Packet4h& from) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet4h& from) {
|
||||||
__int64_t r = _mm_cvtm64_si64(from.x);
|
__int64_t r = _mm_cvtm64_si64(from.x);
|
||||||
*(reinterpret_cast<__int64_t*>(to)) = r;
|
*(reinterpret_cast<__int64_t*>(to)) = r;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline Packet4h pgather<half, Packet4h>(const half* from, Index stride)
|
template<> EIGEN_DEVICE_FUNC inline Packet4h
|
||||||
|
ploadquad<Packet4h>(const Eigen::half* from) {
|
||||||
|
return pset1<Packet4h>(*from);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC inline Packet4h pgather<Eigen::half, Packet4h>(const Eigen::half* from, Index stride)
|
||||||
{
|
{
|
||||||
Packet4h result;
|
Packet4h result;
|
||||||
result.x = _mm_set_pi16(from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
|
result.x = _mm_set_pi16(from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline void pscatter<half, Packet4h>(half* to, const Packet4h& from, Index stride)
|
template<> EIGEN_DEVICE_FUNC inline void pscatter<Eigen::half, Packet4h>(Eigen::half* to, const Packet4h& from, Index stride)
|
||||||
{
|
{
|
||||||
__int64_t a = _mm_cvtm64_si64(from.x);
|
__int64_t a = _mm_cvtm64_si64(from.x);
|
||||||
to[stride*0].x = static_cast<unsigned short>(a);
|
to[stride*0].x = static_cast<unsigned short>(a);
|
||||||
@ -696,9 +694,8 @@ ptranspose(PacketBlock<Packet4h,4>& kernel) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // EIGEN_HAS_CUDA_FP16
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // EIGEN_PACKET_MATH_HALF_CUDA_H
|
#endif // EIGEN_PACKET_MATH_HALF_CUDA_H
|
||||||
|
@ -14,49 +14,47 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
#if defined(EIGEN_HAS_CUDA_FP16)
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct scalar_cast_op<float, half> {
|
struct scalar_cast_op<float, Eigen::half> {
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||||
typedef half result_type;
|
typedef Eigen::half result_type;
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const float& a) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const {
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||||
return __float2half(a);
|
return __float2half(a);
|
||||||
#else
|
#else
|
||||||
return half(a);
|
return Eigen::half(a);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct functor_traits<scalar_cast_op<float, half> >
|
struct functor_traits<scalar_cast_op<float, Eigen::half> >
|
||||||
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct scalar_cast_op<int, half> {
|
struct scalar_cast_op<int, Eigen::half> {
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||||
typedef half result_type;
|
typedef Eigen::half result_type;
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half operator() (const int& a) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const {
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||||
return __float2half(static_cast<float>(a));
|
return __float2half(static_cast<float>(a));
|
||||||
#else
|
#else
|
||||||
return half(static_cast<float>(a));
|
return Eigen::half(static_cast<float>(a));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct functor_traits<scalar_cast_op<int, half> >
|
struct functor_traits<scalar_cast_op<int, Eigen::half> >
|
||||||
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct scalar_cast_op<half, float> {
|
struct scalar_cast_op<Eigen::half, float> {
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
||||||
typedef float result_type;
|
typedef float result_type;
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const half& a) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const {
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||||
return __half2float(a);
|
return __half2float(a);
|
||||||
#else
|
#else
|
||||||
@ -66,7 +64,7 @@ struct scalar_cast_op<half, float> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct functor_traits<scalar_cast_op<half, float> >
|
struct functor_traits<scalar_cast_op<Eigen::half, float> >
|
||||||
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
{ enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; };
|
||||||
|
|
||||||
|
|
||||||
@ -74,7 +72,7 @@ struct functor_traits<scalar_cast_op<half, float> >
|
|||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<half, float> {
|
struct type_casting_traits<Eigen::half, float> {
|
||||||
enum {
|
enum {
|
||||||
VectorizedCast = 1,
|
VectorizedCast = 1,
|
||||||
SrcCoeffRatio = 2,
|
SrcCoeffRatio = 2,
|
||||||
@ -89,7 +87,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(con
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<float, half> {
|
struct type_casting_traits<float, Eigen::half> {
|
||||||
enum {
|
enum {
|
||||||
VectorizedCast = 1,
|
VectorizedCast = 1,
|
||||||
SrcCoeffRatio = 1,
|
SrcCoeffRatio = 1,
|
||||||
@ -99,16 +97,13 @@ struct type_casting_traits<float, half> {
|
|||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
|
||||||
// Simply discard the second half of the input
|
// Simply discard the second half of the input
|
||||||
return __float22half2_rn(make_float2(a.x, a.y));
|
return __floats2half2_rn(a.x, a.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#elif defined EIGEN_VECTORIZE_AVX
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#ifdef EIGEN_VECTORIZE_AVX
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<half, float> {
|
struct type_casting_traits<Eigen::half, float> {
|
||||||
enum {
|
enum {
|
||||||
VectorizedCast = 1,
|
VectorizedCast = 1,
|
||||||
SrcCoeffRatio = 1,
|
SrcCoeffRatio = 1,
|
||||||
@ -121,7 +116,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<float, half> {
|
struct type_casting_traits<float, Eigen::half> {
|
||||||
enum {
|
enum {
|
||||||
VectorizedCast = 1,
|
VectorizedCast = 1,
|
||||||
SrcCoeffRatio = 1,
|
SrcCoeffRatio = 1,
|
||||||
@ -134,8 +129,9 @@ template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f
|
|||||||
}
|
}
|
||||||
|
|
||||||
#elif defined EIGEN_VECTORIZE_SSE && !EIGEN_COMP_MSVC
|
#elif defined EIGEN_VECTORIZE_SSE && !EIGEN_COMP_MSVC
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<half, float> {
|
struct type_casting_traits<Eigen::half, float> {
|
||||||
enum {
|
enum {
|
||||||
VectorizedCast = 1,
|
VectorizedCast = 1,
|
||||||
SrcCoeffRatio = 1,
|
SrcCoeffRatio = 1,
|
||||||
@ -145,7 +141,7 @@ struct type_casting_traits<half, float> {
|
|||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
|
||||||
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
__int64_t a64 = _mm_cvtm64_si64(a.x);
|
||||||
half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
|
||||||
float f1 = static_cast<float>(h);
|
float f1 = static_cast<float>(h);
|
||||||
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
|
||||||
float f2 = static_cast<float>(h);
|
float f2 = static_cast<float>(h);
|
||||||
@ -157,7 +153,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<float, half> {
|
struct type_casting_traits<float, Eigen::half> {
|
||||||
enum {
|
enum {
|
||||||
VectorizedCast = 1,
|
VectorizedCast = 1,
|
||||||
SrcCoeffRatio = 1,
|
SrcCoeffRatio = 1,
|
||||||
@ -168,17 +164,16 @@ struct type_casting_traits<float, half> {
|
|||||||
template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
|
template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
|
||||||
EIGEN_ALIGN16 float aux[4];
|
EIGEN_ALIGN16 float aux[4];
|
||||||
pstore(aux, a);
|
pstore(aux, a);
|
||||||
half h0(aux[0]);
|
Eigen::half h0(aux[0]);
|
||||||
half h1(aux[1]);
|
Eigen::half h1(aux[1]);
|
||||||
half h2(aux[2]);
|
Eigen::half h2(aux[2]);
|
||||||
half h3(aux[3]);
|
Eigen::half h3(aux[3]);
|
||||||
|
|
||||||
Packet4h result;
|
Packet4h result;
|
||||||
result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
|
result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
Loading…
x
Reference in New Issue
Block a user