From c84509d7cc5fa3e032da8cfdcd5e82b2897cc5d9 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 1 Aug 2018 12:40:54 +0100 Subject: [PATCH] Adding new arch/SYCL headers, used for SYCL vectorization. --- Eigen/src/Core/arch/SYCL/InteropHeaders.h | 104 +++++ Eigen/src/Core/arch/SYCL/MathFunctions.h | 221 +++++++++++ Eigen/src/Core/arch/SYCL/PacketMath.h | 458 ++++++++++++++++++++++ Eigen/src/Core/arch/SYCL/TypeCasting.h | 89 +++++ 4 files changed, 872 insertions(+) create mode 100644 Eigen/src/Core/arch/SYCL/InteropHeaders.h create mode 100644 Eigen/src/Core/arch/SYCL/MathFunctions.h create mode 100644 Eigen/src/Core/arch/SYCL/PacketMath.h create mode 100644 Eigen/src/Core/arch/SYCL/TypeCasting.h diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h new file mode 100644 index 000000000..c1da40d14 --- /dev/null +++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h @@ -0,0 +1,104 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * InteropHeaders.h + * + * \brief: + * InteropHeaders + * +*****************************************************************/ + +#ifndef EIGEN_INTEROP_HEADERS_SYCL_H +#define EIGEN_INTEROP_HEADERS_SYCL_H +#if defined EIGEN_USE_SYCL +namespace Eigen { + +namespace internal { +#define SYCL_PACKET_TRAITS(packet_type, val, unpacket_type, lengths)\ + template<> struct packet_traits : default_packet_traits\ + {\ + typedef packet_type type;\ + typedef packet_type half;\ + enum {\ + Vectorizable = 1,\ + AlignedOnScalar = 1,\ + size=lengths,\ + HasHalfPacket = 0,\ + HasDiv = 1,\ + HasLog = 1,\ + HasExp = 1,\ + HasSqrt = 1,\ + HasRsqrt = 1,\ + HasSin = 1,\ + HasCos = 1,\ + HasTan = 1,\ + HasASin = 1,\ + HasACos = 1,\ + HasATan = 1,\ + HasSinh = 1,\ + HasCosh = 1,\ + HasTanh = 1,\ + HasLGamma = 0,\ + HasDiGamma = 0,\ + HasZeta = 0,\ + HasPolygamma = 0,\ + HasErf = 0,\ + HasErfc = 0,\ + HasIGamma = 0,\ + HasIGammac = 0,\ + HasBetaInc = 0,\ + HasBlend = val,\ + HasMax=1,\ + HasMin=1,\ + HasMul=1,\ + HasAdd=1,\ + HasFloor=1,\ + HasRound=1,\ + HasLog1p=1,\ + HasExpm1=1,\ + HasCeil=1,\ + };\ + }; + +SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4) +SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, const float, 4) +SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, double, 2) +SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2) +#undef SYCL_PACKET_TRAITS + + +// Make sure this is only available when targeting a GPU: we don't want to +// introduce conflicts between these packet_traits definitions and the ones +// we'll use on the host side (SSE, AVX, ...) +#define SYCL_ARITHMETIC(packet_type) template<> struct is_arithmetic { enum { value = true }; }; +SYCL_ARITHMETIC(cl::sycl::cl_float4) +SYCL_ARITHMETIC(cl::sycl::cl_double2) +#undef SYCL_ARITHMETIC + +#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\ +template<> struct unpacket_traits {\ + typedef unpacket_type type;\ + enum {size=lengths, alignment=Aligned16};\ + typedef packet_type half;\ +}; +SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4) +SYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2) + +#undef SYCL_UNPACKET_TRAITS + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_USE_SYCL +#endif // EIGEN_INTEROP_HEADERS_SYCL_H diff --git a/Eigen/src/Core/arch/SYCL/MathFunctions.h b/Eigen/src/Core/arch/SYCL/MathFunctions.h new file mode 100644 index 000000000..422839c6c --- /dev/null +++ b/Eigen/src/Core/arch/SYCL/MathFunctions.h @@ -0,0 +1,221 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * MathFunctions.h + * + * \brief: + * MathFunctions + * +*****************************************************************/ + +#ifndef EIGEN_MATH_FUNCTIONS_SYCL_H +#define EIGEN_MATH_FUNCTIONS_SYCL_H + +namespace Eigen { + +namespace internal { + +// Make sure this is only available when targeting a GPU: we don't want to +// introduce conflicts between these packet_traits definitions and the ones +// we'll use on the host side (SSE, AVX, ...) +//#if defined(__SYCL_DEVICE_ONLY__) && defined(EIGEN_USE_SYCL) +#define SYCL_PLOG(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type plog(const packet_type& a) { return cl::sycl::log(a); } + +SYCL_PLOG(cl::sycl::cl_float4) +SYCL_PLOG(cl::sycl::cl_double2) +#undef SYCL_PLOG + +#define SYCL_PLOG1P(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type plog1p(const packet_type& a) { return cl::sycl::log1p(a); } + +SYCL_PLOG1P(cl::sycl::cl_float4) +SYCL_PLOG1P(cl::sycl::cl_double2) +#undef SYCL_PLOG1P + +#define SYCL_PLOG10(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type plog10(const packet_type& a) { return cl::sycl::log10(a); } + +SYCL_PLOG10(cl::sycl::cl_float4) +SYCL_PLOG10(cl::sycl::cl_double2) +#undef SYCL_PLOG10 + +#define SYCL_PEXP(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pexp(const packet_type& a) { return cl::sycl::exp(a); } + +SYCL_PEXP(cl::sycl::cl_float4) +SYCL_PEXP(cl::sycl::cl_double2) +#undef SYCL_PEXP + +#define SYCL_PEXPM1(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pexpm1(const packet_type& a) { return cl::sycl::expm1(a); } + +SYCL_PEXPM1(cl::sycl::cl_float4) +SYCL_PEXPM1(cl::sycl::cl_double2) +#undef SYCL_PEXPM1 + +#define SYCL_PSQRT(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type psqrt(const packet_type& a) { return cl::sycl::sqrt(a); } + +SYCL_PSQRT(cl::sycl::cl_float4) +SYCL_PSQRT(cl::sycl::cl_double2) +#undef SYCL_PSQRT + + +#define SYCL_PRSQRT(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type prsqrt(const packet_type& a) { return cl::sycl::rsqrt(a); } + +SYCL_PRSQRT(cl::sycl::cl_float4) +SYCL_PRSQRT(cl::sycl::cl_double2) +#undef SYCL_PRSQRT + + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +#define SYCL_PSIN(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type psin(const packet_type& a) { return cl::sycl::sin(a); } + +SYCL_PSIN(cl::sycl::cl_float4) +SYCL_PSIN(cl::sycl::cl_double2) +#undef SYCL_PSIN + + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +#define SYCL_PCOS(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pcos(const packet_type& a) { return cl::sycl::cos(a); } + +SYCL_PCOS(cl::sycl::cl_float4) +SYCL_PCOS(cl::sycl::cl_double2) +#undef SYCL_PCOS + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +#define SYCL_PTAN(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type ptan(const packet_type& a) { return cl::sycl::tan(a); } + +SYCL_PTAN(cl::sycl::cl_float4) +SYCL_PTAN(cl::sycl::cl_double2) +#undef SYCL_PTAN + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +#define SYCL_PASIN(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pasin(const packet_type& a) { return cl::sycl::asin(a); } + +SYCL_PASIN(cl::sycl::cl_float4) +SYCL_PASIN(cl::sycl::cl_double2) +#undef SYCL_PASIN + + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +#define SYCL_PACOS(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pacos(const packet_type& a) { return cl::sycl::acos(a); } + +SYCL_PACOS(cl::sycl::cl_float4) +SYCL_PACOS(cl::sycl::cl_double2) +#undef SYCL_PACOS + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +#define SYCL_PATAN(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type patan(const packet_type& a) { return cl::sycl::atan(a); } + +SYCL_PATAN(cl::sycl::cl_float4) +SYCL_PATAN(cl::sycl::cl_double2) +#undef SYCL_PATAN + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +#define SYCL_PSINH(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type psinh(const packet_type& a) { return cl::sycl::sinh(a); } + +SYCL_PSINH(cl::sycl::cl_float4) +SYCL_PSINH(cl::sycl::cl_double2) +#undef SYCL_PSINH + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +#define SYCL_PCOSH(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pcosh(const packet_type& a) { return cl::sycl::cosh(a); } + +SYCL_PCOSH(cl::sycl::cl_float4) +SYCL_PCOSH(cl::sycl::cl_double2) +#undef SYCL_PCOSH + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +#define SYCL_PTANH(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type ptanh(const packet_type& a) { return cl::sycl::tanh(a); } + +SYCL_PTANH(cl::sycl::cl_float4) +SYCL_PTANH(cl::sycl::cl_double2) +#undef SYCL_PTANH + +#define SYCL_PCEIL(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pceil(const packet_type& a) { return cl::sycl::ceil(a); } + +SYCL_PCEIL(cl::sycl::cl_float4) +SYCL_PCEIL(cl::sycl::cl_double2) +#undef SYCL_PCEIL + + +#define SYCL_PROUND(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pround(const packet_type& a) { return cl::sycl::round(a); } + +SYCL_PROUND(cl::sycl::cl_float4) +SYCL_PROUND(cl::sycl::cl_double2) +#undef SYCL_PROUND + +#define SYCL_FLOOR(packet_type) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pfloor(const packet_type& a) { return cl::sycl::floor(a); } + +SYCL_FLOOR(cl::sycl::cl_float4) +SYCL_FLOOR(cl::sycl::cl_double2) +#undef SYCL_FLOOR + + +#define SYCL_PMIN(packet_type, expr) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pmin(const packet_type& a, const packet_type& b) { return expr; } + +SYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b)) +SYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b)) +#undef SYCL_PMIN + +#define SYCL_PMAX(packet_type, expr) \ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ +packet_type pmax(const packet_type& a, const packet_type& b) { return expr; } + +SYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b)) +SYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b)) +#undef SYCL_PMAX + +//#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATH_FUNCTIONS_CUDA_H diff --git a/Eigen/src/Core/arch/SYCL/PacketMath.h b/Eigen/src/Core/arch/SYCL/PacketMath.h new file mode 100644 index 000000000..820a83311 --- /dev/null +++ b/Eigen/src/Core/arch/SYCL/PacketMath.h @@ -0,0 +1,458 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * PacketMath.h + * + * \brief: + * PacketMath + * +*****************************************************************/ + +#ifndef EIGEN_PACKET_MATH_SYCL_H +#define EIGEN_PACKET_MATH_SYCL_H +#include +#if defined EIGEN_USE_SYCL +namespace Eigen { + +namespace internal { + +#define SYCL_PLOADT_RO(address_space_target)\ +template\ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\ + ploadt_ro(typename cl::sycl::multi_ptr::type,\ + cl::sycl::access::address_space::address_space_target>::pointer_t from) {\ + typedef typename unpacket_traits::type scalar;\ + typedef cl::sycl::multi_ptr multi_ptr;\ + auto res=packet_type(static_cast::type>(0));\ + res.load(0, multi_ptr(const_cast(from)));\ + return res;\ +} + +SYCL_PLOADT_RO(global_space) +SYCL_PLOADT_RO(local_space) + +#undef SYCL_PLOADT_RO + + +#define SYCL_PLOAD(address_space_target, Alignment, AlignedType)\ +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\ + pload##AlignedType(typename cl::sycl::multi_ptr::type,\ + cl::sycl::access::address_space::address_space_target>::pointer_t from) {\ + return ploadt_ro(from);\ + } + +// global space +SYCL_PLOAD(global_space, Unaligned, u) +SYCL_PLOAD(global_space, Aligned, ) + +// local space +SYCL_PLOAD(local_space, Unaligned, u) +SYCL_PLOAD(local_space, Aligned, ) + +// private space +//SYCL_PLOAD(private_space, Unaligned, u) +//SYCL_PLOAD(private_space, Aligned, ) + +#undef SYCL_PLOAD + + +/** \internal \returns a packet version of \a *from. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +#define SYCL_PLOADT(address_space_target)\ +template\ +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(\ + typename cl::sycl::multi_ptr::type,\ + cl::sycl::access::address_space::address_space_target>::pointer_t from)\ +{\ + if(Alignment >= unpacket_traits::alignment)\ + return pload(from);\ + else\ + return ploadu(from);\ +} + +// global space +SYCL_PLOADT(global_space) +// local space +SYCL_PLOADT(local_space) + +//private_space +// There is no need to specialise it for private space as it can use the GenericPacketMath version + +#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment)\ + template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\ + ploadt_ro(const typename unpacket_traits::type * from) { \ + typedef typename unpacket_traits::type scalar;\ + auto res=packet_type(static_cast(0));\ + res. template load(0, const_cast(from));\ + return res;\ + } + +SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned) +SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned) +SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned) +SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned) + + +#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type)\ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\ + pload##alignment_type(const typename unpacket_traits::type * from) { \ + typedef typename unpacket_traits::type scalar;\ + auto res=packet_type(static_cast(0));\ + res. template load(0, const_cast(from));\ + return res;\ + } +SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4,) +SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2,) +SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u) +SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u) + +#undef SYCL_PLOAD_SPECIAL + +#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment)\ +template<>\ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ + typename cl::sycl::multi_ptr::pointer_t to, \ + const packet_type& from) {\ + typedef cl::sycl::multi_ptr multi_ptr;\ + from.store(0, multi_ptr(to));\ +} + +// global space +SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, ) +SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u) +SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, ) +SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u) + +SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, ) +SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u) +SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, ) +SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u) + +SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, ) +SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u) +SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, ) +SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u) + + +#define SYCL_PSTORE_T(scalar, packet_type, Alignment)\ +template<>\ +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(\ + scalar* to,\ + const packet_type& from) {\ + if(Alignment)\ + pstore(to, from);\ + else\ + pstoreu(to,from);\ +} + + +SYCL_PSTORE_T(float, cl::sycl::cl_float4, Aligned) + +SYCL_PSTORE_T(float, cl::sycl::cl_float4, Unaligned) + +SYCL_PSTORE_T(double, cl::sycl::cl_double2, Aligned) + +SYCL_PSTORE_T(double, cl::sycl::cl_double2, Unaligned) + + +#undef SYCL_PSTORE_T + +#define SYCL_PSET1(packet_type)\ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1(\ + const typename unpacket_traits::type& from) {\ + return packet_type(from);\ +} + +// global space +SYCL_PSET1(cl::sycl::cl_float4) +SYCL_PSET1(cl::sycl::cl_double2) + +#undef SYCL_PSET1 + + +template struct get_base_packet { +template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer ) {} + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer , Index ) {} +}; + +template <> struct get_base_packet { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) { + return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) { + return cl::sycl::cl_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_float4& from, Index stride) { + auto tmp = stride; + to[0] = from.x(); + to[tmp] = from.y(); + to[tmp += stride] = from.z(); + to[tmp += stride] = from.w(); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) { + return cl::sycl::cl_float4(static_cast(a), static_cast(a+1), static_cast(a+2), static_cast(a+3)); + } +}; + +template <> struct get_base_packet { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) { + return cl::sycl::cl_double2(from[0], from[0]); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from, Index stride) { + return cl::sycl::cl_double2(from[0*stride], from[1*stride]); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_double2& from, Index stride) { + to[0] = from.x(); + to[stride] = from.y(); + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) { + return cl::sycl::cl_double2(static_cast(a), static_cast(a + 1)); + } +}; + +#define SYCL_PLOAD_DUP(address_space_target)\ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ +ploaddup(typename cl::sycl::multi_ptr::type,\ + cl::sycl::access::address_space::address_space_target>::pointer_t from)\ +{\ + return get_base_packet::get_ploaddup(from); \ +} + +// global space +SYCL_PLOAD_DUP(global_space) +// local_space +SYCL_PLOAD_DUP(local_space) +// private_space +//SYCL_PLOAD_DUP(private_space) +#undef SYCL_PLOAD_DUP + +#define SYCL_PLOAD_DUP_SPECILIZE(packet_type)\ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ +ploaddup(const typename unpacket_traits::type * from)\ +{ \ + return get_base_packet::get_ploaddup(from); \ +} + +SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4) +SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2) + +#undef SYCL_PLOAD_DUP_SPECILIZE + +#define SYCL_PLSET(packet_type)\ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset(const typename unpacket_traits::type& a) {\ + return get_base_packet::set_plset(a);\ +} + +SYCL_PLSET(cl::sycl::cl_float4) +SYCL_PLSET(cl::sycl::cl_double2) + +#undef SYCL_PLSET + + +#define SYCL_PGATHER(address_space_target)\ +template EIGEN_DEVICE_FUNC inline packet_type pgather(\ + typename cl::sycl::multi_ptr::type,\ + cl::sycl::access::address_space::address_space_target>::pointer_t from, Index stride) {\ + return get_base_packet::get_pgather(from, stride); \ +} + +// global space +SYCL_PGATHER(global_space) +// local space +SYCL_PGATHER(local_space) +// private space +//SYCL_PGATHER(private_space) + +#undef SYCL_PGATHER + + +#define SYCL_PGATHER_SPECILIZE(scalar, packet_type)\ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ +pgather(const typename unpacket_traits::type * from, Index stride)\ +{ \ + return get_base_packet::get_pgather(from, stride); \ +} + +SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4) +SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2) + +#undef SYCL_PGATHER_SPECILIZE + +#define SYCL_PSCATTER(address_space_target)\ +template EIGEN_DEVICE_FUNC inline void pscatter(\ + typename cl::sycl::multi_ptr::type,\ + cl::sycl::access::address_space::address_space_target>::pointer_t to,\ + const packet_type& from, Index stride) {\ + get_base_packet::set_pscatter(to, from, stride);\ +} + +// global space +SYCL_PSCATTER(global_space) +// local space +SYCL_PSCATTER(local_space) +// private space +//SYCL_PSCATTER(private_space) + +#undef SYCL_PSCATTER + + + +#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type)\ +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void \ +pscatter(typename unpacket_traits::type * to, const packet_type& from, Index stride)\ +{ \ + get_base_packet::set_pscatter(to, from, stride);\ +} + +SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4) +SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2) + +#undef SYCL_PSCATTER_SPECILIZE + + +#define SYCL_PMAD(packet_type)\ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( const packet_type& a,\ + const packet_type& b, const packet_type& c){\ + return cl::sycl::mad(a,b,c);\ +} + +SYCL_PMAD(cl::sycl::cl_float4) +SYCL_PMAD(cl::sycl::cl_double2) +#undef SYCL_PMAD + + + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst(const cl::sycl::cl_float4& a) { + return a.x(); +} +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst(const cl::sycl::cl_double2& a) { + return a.x(); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux(const cl::sycl::cl_float4& a) { + return a.x() + a.y() + a.z() + a.w(); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux(const cl::sycl::cl_double2& a) { + return a.x() + a.y(); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max(const cl::sycl::cl_float4& a) { + return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w())); +} +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max(const cl::sycl::cl_double2& a) { + return cl::sycl::fmax(a.x(), a.y()); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min(const cl::sycl::cl_float4& a) { + return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w())); +} +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min(const cl::sycl::cl_double2& a) { + return cl::sycl::fmin(a.x(), a.y()); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul(const cl::sycl::cl_float4& a) { + return a.x() * a.y() * a.z() * a.w(); +} +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul(const cl::sycl::cl_double2& a) { + return a.x() * a.y(); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs(const cl::sycl::cl_float4& a) { + return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w())); +} +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs(const cl::sycl::cl_double2& a) { + return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y())); +} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void +ptranspose(PacketBlock& kernel) { + float tmp = kernel.packet[0].y(); + kernel.packet[0].y() = kernel.packet[1].x(); + kernel.packet[1].x() = tmp; +// std::swap(kernel.packet[0].y(), kernel.packet[1].x()); + + tmp = kernel.packet[0].z(); + kernel.packet[0].z() = kernel.packet[2].x(); + kernel.packet[2].x() = tmp; + //std::swap(kernel.packet[0].z(), kernel.packet[2].x()); + + tmp = kernel.packet[0].w(); + kernel.packet[0].w() = kernel.packet[3].x(); + kernel.packet[3].x() = tmp; + + //std::swap(kernel.packet[0].w(), kernel.packet[3].x()); + + tmp = kernel.packet[1].z(); + kernel.packet[1].z() = kernel.packet[2].y(); + kernel.packet[2].y() = tmp; +// std::swap(kernel.packet[1].z(), kernel.packet[2].y()); + + tmp = kernel.packet[1].w(); + kernel.packet[1].w() = kernel.packet[3].y(); + kernel.packet[3].y() = tmp; +// std::swap(kernel.packet[1].w(), kernel.packet[3].y()); + + tmp = kernel.packet[2].w(); + kernel.packet[2].w() = kernel.packet[3].z(); + kernel.packet[3].z() = tmp; +// std::swap(kernel.packet[2].w(), kernel.packet[3].z()); + +} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void +ptranspose(PacketBlock& kernel) { + double tmp = kernel.packet[0].y(); + kernel.packet[0].y() = kernel.packet[1].x(); + kernel.packet[1].x() = tmp; +//std::swap(kernel.packet[0].y(), kernel.packet[1].x()); +} + + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 +pblend(const Selector::size>& ifPacket, + const cl::sycl::cl_float4& thenPacket, const cl::sycl::cl_float4& elsePacket) { + cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1, + ifPacket.select[1] ? 0 : -1, + ifPacket.select[2] ? 0 : -1, + ifPacket.select[3] ? 0 : -1); + return cl::sycl::select(thenPacket, elsePacket, condition); +} + +template<> inline cl::sycl::cl_double2 +pblend(const Selector::size>& ifPacket, + const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) { + cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, + ifPacket.select[1] ? 0 : -1); + return cl::sycl::select(thenPacket, elsePacket, condition); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_USE_SYCL +#endif // EIGEN_PACKET_MATH_SYCL_H diff --git a/Eigen/src/Core/arch/SYCL/TypeCasting.h b/Eigen/src/Core/arch/SYCL/TypeCasting.h new file mode 100644 index 000000000..dedd5c84a --- /dev/null +++ b/Eigen/src/Core/arch/SYCL/TypeCasting.h @@ -0,0 +1,89 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TypeCasting.h + * + * \brief: + * TypeCasting + * +*****************************************************************/ + +#ifndef EIGEN_TYPE_CASTING_SYCL_H +#define EIGEN_TYPE_CASTING_SYCL_H + +namespace Eigen { + +namespace internal { +#ifdef __SYCL_DEVICE_ONLY__ +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4 pcast(const cl::sycl::cl_float4& a) { + return a. template convert(); +} + + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast(const cl::sycl::cl_int4& a) { + return a. template convert(); +} + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 2, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast(const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) { + auto a1=a. template convert(); + auto b1=b. template convert(); + return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y()); +} + +template <> +struct type_casting_traits { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 2 + }; +}; + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pcast(const cl::sycl::cl_float4& a) { + // Simply discard the second half of the input + return cl::sycl::cl_double2(a.x(), a.y()); +} + +#endif +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_SYCL_H