mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
[SYCL] This PR adds the minimum modifications to Eigen core required to run Eigen unsupported modules on devices supporting SYCL.
* Adding SYCL memory model * Enabling/Disabling SYCL backend in Core * Supporting Vectorization
This commit is contained in:
parent
adec097c61
commit
16a56b2ddd
17
Eigen/Core
17
Eigen/Core
@ -101,13 +101,23 @@
|
|||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(EIGEN_USE_SYCL)
|
||||||
#undef min
|
#undef min
|
||||||
#undef max
|
#undef max
|
||||||
#undef isnan
|
#undef isnan
|
||||||
#undef isinf
|
#undef isinf
|
||||||
#undef isfinite
|
#undef isfinite
|
||||||
#include <SYCL/sycl.hpp>
|
#include <SYCL/sycl.hpp>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
#include <thread>
|
||||||
|
#ifndef EIGEN_SYCL_LOCAL_THREAD_DIM0
|
||||||
|
#define EIGEN_SYCL_LOCAL_THREAD_DIM0 16
|
||||||
|
#endif
|
||||||
|
#ifndef EIGEN_SYCL_LOCAL_THREAD_DIM1
|
||||||
|
#define EIGEN_SYCL_LOCAL_THREAD_DIM1 16
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -207,12 +217,15 @@ using std::ptrdiff_t;
|
|||||||
#include "src/Core/arch/GPU/MathFunctions.h"
|
#include "src/Core/arch/GPU/MathFunctions.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined EIGEN_VECTORIZE_SYCL
|
#if defined(EIGEN_USE_SYCL)
|
||||||
|
#include "src/Core/arch/SYCL/SyclMemoryModel.h"
|
||||||
#include "src/Core/arch/SYCL/InteropHeaders.h"
|
#include "src/Core/arch/SYCL/InteropHeaders.h"
|
||||||
|
#if !defined(EIGEN_DONT_VECTORIZE_SYCL)
|
||||||
#include "src/Core/arch/SYCL/PacketMath.h"
|
#include "src/Core/arch/SYCL/PacketMath.h"
|
||||||
#include "src/Core/arch/SYCL/MathFunctions.h"
|
#include "src/Core/arch/SYCL/MathFunctions.h"
|
||||||
#include "src/Core/arch/SYCL/TypeCasting.h"
|
#include "src/Core/arch/SYCL/TypeCasting.h"
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
#include "src/Core/arch/Default/Settings.h"
|
#include "src/Core/arch/Default/Settings.h"
|
||||||
|
|
||||||
#include "src/Core/functors/TernaryFunctors.h"
|
#include "src/Core/functors/TernaryFunctors.h"
|
||||||
|
@ -954,7 +954,7 @@ EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
|
|
||||||
|
|
||||||
#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \
|
#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \
|
||||||
@ -991,7 +991,7 @@ EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)
|
|||||||
SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC,cl::sycl::cl_double)
|
SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC,cl::sycl::cl_double)
|
||||||
#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(NAME, FUNC, RET_TYPE) \
|
#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(NAME, FUNC, RET_TYPE) \
|
||||||
SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_float) \
|
SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_float) \
|
||||||
SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_double)
|
SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_double)
|
||||||
|
|
||||||
#define SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \
|
#define SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \
|
||||||
template<> \
|
template<> \
|
||||||
@ -1021,7 +1021,7 @@ SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(mini, fmin)
|
|||||||
SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(maxi, max)
|
SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(maxi, max)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(maxi, fmax)
|
SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(maxi, fmax)
|
||||||
|
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
@ -1104,9 +1104,9 @@ inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar&
|
|||||||
return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
|
return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(hypot, hypot)
|
SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(hypot, hypot)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -1115,9 +1115,9 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
|
|||||||
return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
|
return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log1p, log1p)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log1p, log1p)
|
||||||
#endif //defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1134,19 +1134,19 @@ inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const Scala
|
|||||||
return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
|
return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(pow, pow)
|
SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(pow, pow)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); }
|
template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); }
|
||||||
template<typename T> EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); }
|
template<typename T> EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); }
|
||||||
template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }
|
template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isnan, isnan, bool)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isnan, isnan, bool)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isinf, isinf, bool)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isinf, isinf, bool)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -1155,9 +1155,9 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
|
|||||||
return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
|
return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -1167,9 +1167,9 @@ T (floor)(const T& x)
|
|||||||
return floor(x);
|
return floor(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1187,9 +1187,9 @@ T (ceil)(const T& x)
|
|||||||
return ceil(x);
|
return ceil(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1232,9 +1232,9 @@ T sqrt(const T &x)
|
|||||||
return sqrt(x);
|
return sqrt(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1243,9 +1243,9 @@ T log(const T &x) {
|
|||||||
return log(x);
|
return log(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log, log)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log, log)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
@ -1271,10 +1271,10 @@ abs(const T &x) {
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(abs, abs)
|
SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(abs, abs)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(abs, fabs)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(abs, fabs)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1301,9 +1301,9 @@ T exp(const T &x) {
|
|||||||
return exp(x);
|
return exp(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1336,9 +1336,9 @@ inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x)
|
|||||||
return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x);
|
return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(expm1, expm1)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(expm1, expm1)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1355,9 +1355,9 @@ T cos(const T &x) {
|
|||||||
return cos(x);
|
return cos(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cos,cos)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cos,cos)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1374,9 +1374,9 @@ T sin(const T &x) {
|
|||||||
return sin(x);
|
return sin(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sin, sin)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sin, sin)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1393,9 +1393,9 @@ T tan(const T &x) {
|
|||||||
return tan(x);
|
return tan(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tan, tan)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tan, tan)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1421,10 +1421,10 @@ T acosh(const T &x) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acosh, acosh)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acosh, acosh)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1450,10 +1450,10 @@ T asinh(const T &x) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asinh, asinh)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asinh, asinh)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1479,10 +1479,10 @@ T atanh(const T &x) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atanh, atanh)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atanh, atanh)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1500,9 +1500,9 @@ T cosh(const T &x) {
|
|||||||
return cosh(x);
|
return cosh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cosh, cosh)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cosh, cosh)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1519,9 +1519,9 @@ T sinh(const T &x) {
|
|||||||
return sinh(x);
|
return sinh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sinh, sinh)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sinh, sinh)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1538,14 +1538,14 @@ T tanh(const T &x) {
|
|||||||
return tanh(x);
|
return tanh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && (!defined(__SYCL_DEVICE_ONLY__))
|
#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY)
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
float tanh(float x) { return internal::generic_fast_tanh_float(x); }
|
float tanh(float x) { return internal::generic_fast_tanh_float(x); }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tanh, tanh)
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tanh, tanh)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
@ -1562,9 +1562,9 @@ T fmod(const T& a, const T& b) {
|
|||||||
return fmod(a, b);
|
return fmod(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(fmod, fmod)
|
SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(fmod, fmod)
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_GPUCC)
|
#if defined(EIGEN_GPUCC)
|
||||||
template <>
|
template <>
|
||||||
@ -1580,7 +1580,7 @@ double fmod(const double& a, const double& b) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY
|
#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY
|
||||||
#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY
|
#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY
|
||||||
#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY
|
#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY
|
||||||
@ -1595,7 +1595,7 @@ double fmod(const double& a, const double& b) {
|
|||||||
#undef SYCL_SPECIALIZE_GEN1_BINARY_FUNC
|
#undef SYCL_SPECIALIZE_GEN1_BINARY_FUNC
|
||||||
#undef SYCL_SPECIALIZE_GEN2_BINARY_FUNC
|
#undef SYCL_SPECIALIZE_GEN2_BINARY_FUNC
|
||||||
#undef SYCL_SPECIALIZE_BINARY_FUNC
|
#undef SYCL_SPECIALIZE_BINARY_FUNC
|
||||||
#endif // defined(__SYCL_DEVICE_ONLY__)
|
#endif
|
||||||
|
|
||||||
} // end namespace numext
|
} // end namespace numext
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ struct __half_raw {
|
|||||||
typedef __half __half_raw;
|
typedef __half __half_raw;
|
||||||
#endif // defined(EIGEN_HAS_CUDA_FP16)
|
#endif // defined(EIGEN_HAS_CUDA_FP16)
|
||||||
|
|
||||||
#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
|
#elif defined(SYCL_DEVICE_ONLY)
|
||||||
typedef cl::sycl::half __half_raw;
|
typedef cl::sycl::half __half_raw;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -16,58 +16,67 @@
|
|||||||
* \brief:
|
* \brief:
|
||||||
* InteropHeaders
|
* InteropHeaders
|
||||||
*
|
*
|
||||||
*****************************************************************/
|
*****************************************************************/
|
||||||
|
|
||||||
#ifndef EIGEN_INTEROP_HEADERS_SYCL_H
|
#ifndef EIGEN_INTEROP_HEADERS_SYCL_H
|
||||||
#define EIGEN_INTEROP_HEADERS_SYCL_H
|
#define EIGEN_INTEROP_HEADERS_SYCL_H
|
||||||
#if defined EIGEN_USE_SYCL
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
|
#if !defined(EIGEN_DONT_VECTORIZE_SYCL)
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
#define SYCL_PACKET_TRAITS(packet_type, val, unpacket_type, lengths)\
|
|
||||||
template<> struct packet_traits<unpacket_type> : default_packet_traits\
|
template <int has_blend, int lengths>
|
||||||
{\
|
struct sycl_packet_traits : default_packet_traits {
|
||||||
typedef packet_type type;\
|
enum {
|
||||||
typedef packet_type half;\
|
Vectorizable = 1,
|
||||||
enum {\
|
AlignedOnScalar = 1,
|
||||||
Vectorizable = 1,\
|
size = lengths,
|
||||||
AlignedOnScalar = 1,\
|
HasHalfPacket = 0,
|
||||||
size=lengths,\
|
HasDiv = 1,
|
||||||
HasHalfPacket = 0,\
|
HasLog = 1,
|
||||||
HasDiv = 1,\
|
HasExp = 1,
|
||||||
HasLog = 1,\
|
HasSqrt = 1,
|
||||||
HasExp = 1,\
|
HasRsqrt = 1,
|
||||||
HasSqrt = 1,\
|
HasSin = 1,
|
||||||
HasRsqrt = 1,\
|
HasCos = 1,
|
||||||
HasSin = 1,\
|
HasTan = 1,
|
||||||
HasCos = 1,\
|
HasASin = 1,
|
||||||
HasTan = 1,\
|
HasACos = 1,
|
||||||
HasASin = 1,\
|
HasATan = 1,
|
||||||
HasACos = 1,\
|
HasSinh = 1,
|
||||||
HasATan = 1,\
|
HasCosh = 1,
|
||||||
HasSinh = 1,\
|
HasTanh = 1,
|
||||||
HasCosh = 1,\
|
HasLGamma = 0,
|
||||||
HasTanh = 1,\
|
HasDiGamma = 0,
|
||||||
HasLGamma = 0,\
|
HasZeta = 0,
|
||||||
HasDiGamma = 0,\
|
HasPolygamma = 0,
|
||||||
HasZeta = 0,\
|
HasErf = 0,
|
||||||
HasPolygamma = 0,\
|
HasErfc = 0,
|
||||||
HasErf = 0,\
|
HasIGamma = 0,
|
||||||
HasErfc = 0,\
|
HasIGammac = 0,
|
||||||
HasIGamma = 0,\
|
HasBetaInc = 0,
|
||||||
HasIGammac = 0,\
|
HasBlend = has_blend,
|
||||||
HasBetaInc = 0,\
|
HasMax = 1,
|
||||||
HasBlend = val,\
|
HasMin = 1,
|
||||||
HasMax=1,\
|
HasMul = 1,
|
||||||
HasMin=1,\
|
HasAdd = 1,
|
||||||
HasMul=1,\
|
HasFloor = 1,
|
||||||
HasAdd=1,\
|
HasRound = 1,
|
||||||
HasFloor=1,\
|
HasLog1p = 1,
|
||||||
HasRound=1,\
|
HasExpm1 = 1,
|
||||||
HasLog1p=1,\
|
HasCeil = 1,
|
||||||
HasExpm1=1,\
|
};
|
||||||
HasCeil=1,\
|
};
|
||||||
};\
|
|
||||||
|
#ifdef SYCL_DEVICE_ONLY
|
||||||
|
#define SYCL_PACKET_TRAITS(packet_type, has_blend, unpacket_type, lengths) \
|
||||||
|
template <> \
|
||||||
|
struct packet_traits<unpacket_type> \
|
||||||
|
: sycl_packet_traits<has_blend, lengths> { \
|
||||||
|
typedef packet_type type; \
|
||||||
|
typedef packet_type half; \
|
||||||
};
|
};
|
||||||
|
|
||||||
SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4)
|
SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4)
|
||||||
@ -76,29 +85,137 @@ SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, double, 2)
|
|||||||
SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2)
|
SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2)
|
||||||
#undef SYCL_PACKET_TRAITS
|
#undef SYCL_PACKET_TRAITS
|
||||||
|
|
||||||
|
|
||||||
// Make sure this is only available when targeting a GPU: we don't want to
|
// Make sure this is only available when targeting a GPU: we don't want to
|
||||||
// introduce conflicts between these packet_traits definitions and the ones
|
// introduce conflicts between these packet_traits definitions and the ones
|
||||||
// we'll use on the host side (SSE, AVX, ...)
|
// we'll use on the host side (SSE, AVX, ...)
|
||||||
#define SYCL_ARITHMETIC(packet_type) template<> struct is_arithmetic<packet_type> { enum { value = true }; };
|
#define SYCL_ARITHMETIC(packet_type) \
|
||||||
|
template <> \
|
||||||
|
struct is_arithmetic<packet_type> { \
|
||||||
|
enum { value = true }; \
|
||||||
|
};
|
||||||
SYCL_ARITHMETIC(cl::sycl::cl_float4)
|
SYCL_ARITHMETIC(cl::sycl::cl_float4)
|
||||||
SYCL_ARITHMETIC(cl::sycl::cl_double2)
|
SYCL_ARITHMETIC(cl::sycl::cl_double2)
|
||||||
#undef SYCL_ARITHMETIC
|
#undef SYCL_ARITHMETIC
|
||||||
|
|
||||||
#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\
|
#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths) \
|
||||||
template<> struct unpacket_traits<packet_type> {\
|
template <> \
|
||||||
typedef unpacket_type type;\
|
struct unpacket_traits<packet_type> { \
|
||||||
enum {size=lengths, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\
|
typedef unpacket_type type; \
|
||||||
typedef packet_type half;\
|
enum { size = lengths, vectorizable = true, alignment = Aligned16 }; \
|
||||||
};
|
typedef packet_type half; \
|
||||||
|
};
|
||||||
SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)
|
SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)
|
||||||
SYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2)
|
SYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2)
|
||||||
|
|
||||||
#undef SYCL_UNPACKET_TRAITS
|
#undef SYCL_UNPACKET_TRAITS
|
||||||
|
#endif
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
#endif
|
||||||
|
|
||||||
#endif // EIGEN_USE_SYCL
|
namespace TensorSycl {
|
||||||
#endif // EIGEN_INTEROP_HEADERS_SYCL_H
|
namespace internal {
|
||||||
|
|
||||||
|
template <typename PacketReturnType, int PacketSize>
|
||||||
|
struct PacketWrapper;
|
||||||
|
// This function should never get called on the device
|
||||||
|
#ifndef SYCL_DEVICE_ONLY
|
||||||
|
template <typename PacketReturnType, int PacketSize>
|
||||||
|
struct PacketWrapper {
|
||||||
|
typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
|
||||||
|
Scalar;
|
||||||
|
template <typename Index>
|
||||||
|
EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &) {
|
||||||
|
eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE");
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in,
|
||||||
|
Scalar) {
|
||||||
|
return ::Eigen::internal::template plset<PacketReturnType>(in);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType, Scalar *) {
|
||||||
|
eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE");
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#elif defined(SYCL_DEVICE_ONLY)
|
||||||
|
template <typename PacketReturnType>
|
||||||
|
struct PacketWrapper<PacketReturnType, 4> {
|
||||||
|
typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
|
||||||
|
Scalar;
|
||||||
|
template <typename Index>
|
||||||
|
EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) {
|
||||||
|
switch (index) {
|
||||||
|
case 0:
|
||||||
|
return in.x();
|
||||||
|
case 1:
|
||||||
|
return in.y();
|
||||||
|
case 2:
|
||||||
|
return in.z();
|
||||||
|
case 3:
|
||||||
|
return in.w();
|
||||||
|
default:
|
||||||
|
eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 3");
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(
|
||||||
|
Scalar in, Scalar other) {
|
||||||
|
return PacketReturnType(in, other, other, other);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
|
||||||
|
lhs = PacketReturnType(rhs[0], rhs[1], rhs[2], rhs[3]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename PacketReturnType>
|
||||||
|
struct PacketWrapper<PacketReturnType, 1> {
|
||||||
|
typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
|
||||||
|
Scalar;
|
||||||
|
template <typename Index>
|
||||||
|
EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &in) {
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in,
|
||||||
|
Scalar) {
|
||||||
|
return PacketReturnType(in);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
|
||||||
|
lhs = rhs[0];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename PacketReturnType>
|
||||||
|
struct PacketWrapper<PacketReturnType, 2> {
|
||||||
|
typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
|
||||||
|
Scalar;
|
||||||
|
template <typename Index>
|
||||||
|
EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) {
|
||||||
|
switch (index) {
|
||||||
|
case 0:
|
||||||
|
return in.x();
|
||||||
|
case 1:
|
||||||
|
return in.y();
|
||||||
|
default:
|
||||||
|
eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 1");
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(
|
||||||
|
Scalar in, Scalar other) {
|
||||||
|
return PacketReturnType(in, other);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
|
||||||
|
lhs = PacketReturnType(rhs[0], rhs[1]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
} // end namespace TensorSycl
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_INTEROP_HEADERS_SYCL_H
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
* \brief:
|
* \brief:
|
||||||
* MathFunctions
|
* MathFunctions
|
||||||
*
|
*
|
||||||
*****************************************************************/
|
*****************************************************************/
|
||||||
|
|
||||||
#ifndef EIGEN_MATH_FUNCTIONS_SYCL_H
|
#ifndef EIGEN_MATH_FUNCTIONS_SYCL_H
|
||||||
#define EIGEN_MATH_FUNCTIONS_SYCL_H
|
#define EIGEN_MATH_FUNCTIONS_SYCL_H
|
||||||
@ -28,194 +28,251 @@ namespace internal {
|
|||||||
// Make sure this is only available when targeting a GPU: we don't want to
|
// Make sure this is only available when targeting a GPU: we don't want to
|
||||||
// introduce conflicts between these packet_traits definitions and the ones
|
// introduce conflicts between these packet_traits definitions and the ones
|
||||||
// we'll use on the host side (SSE, AVX, ...)
|
// we'll use on the host side (SSE, AVX, ...)
|
||||||
//#if defined(__SYCL_DEVICE_ONLY__) && defined(EIGEN_USE_SYCL)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
#define SYCL_PLOG(packet_type) \
|
#define SYCL_PLOG(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type plog<packet_type>(const packet_type& a) { return cl::sycl::log(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::log(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PLOG(cl::sycl::cl_float4)
|
SYCL_PLOG(cl::sycl::cl_float4)
|
||||||
SYCL_PLOG(cl::sycl::cl_double2)
|
SYCL_PLOG(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PLOG
|
#undef SYCL_PLOG
|
||||||
|
|
||||||
#define SYCL_PLOG1P(packet_type) \
|
#define SYCL_PLOG1P(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type plog1p<packet_type>(const packet_type& a) { return cl::sycl::log1p(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog1p<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::log1p(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PLOG1P(cl::sycl::cl_float4)
|
SYCL_PLOG1P(cl::sycl::cl_float4)
|
||||||
SYCL_PLOG1P(cl::sycl::cl_double2)
|
SYCL_PLOG1P(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PLOG1P
|
#undef SYCL_PLOG1P
|
||||||
|
|
||||||
#define SYCL_PLOG10(packet_type) \
|
#define SYCL_PLOG10(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type plog10<packet_type>(const packet_type& a) { return cl::sycl::log10(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog10<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::log10(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PLOG10(cl::sycl::cl_float4)
|
SYCL_PLOG10(cl::sycl::cl_float4)
|
||||||
SYCL_PLOG10(cl::sycl::cl_double2)
|
SYCL_PLOG10(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PLOG10
|
#undef SYCL_PLOG10
|
||||||
|
|
||||||
#define SYCL_PEXP(packet_type) \
|
#define SYCL_PEXP(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type pexp<packet_type>(const packet_type& a) { return cl::sycl::exp(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexp<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::exp(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PEXP(cl::sycl::cl_float4)
|
SYCL_PEXP(cl::sycl::cl_float4)
|
||||||
SYCL_PEXP(cl::sycl::cl_double2)
|
SYCL_PEXP(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PEXP
|
#undef SYCL_PEXP
|
||||||
|
|
||||||
#define SYCL_PEXPM1(packet_type) \
|
#define SYCL_PEXPM1(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type pexpm1<packet_type>(const packet_type& a) { return cl::sycl::expm1(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexpm1<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::expm1(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PEXPM1(cl::sycl::cl_float4)
|
SYCL_PEXPM1(cl::sycl::cl_float4)
|
||||||
SYCL_PEXPM1(cl::sycl::cl_double2)
|
SYCL_PEXPM1(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PEXPM1
|
#undef SYCL_PEXPM1
|
||||||
|
|
||||||
#define SYCL_PSQRT(packet_type) \
|
#define SYCL_PSQRT(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type psqrt<packet_type>(const packet_type& a) { return cl::sycl::sqrt(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psqrt<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::sqrt(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PSQRT(cl::sycl::cl_float4)
|
SYCL_PSQRT(cl::sycl::cl_float4)
|
||||||
SYCL_PSQRT(cl::sycl::cl_double2)
|
SYCL_PSQRT(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PSQRT
|
#undef SYCL_PSQRT
|
||||||
|
|
||||||
|
#define SYCL_PRSQRT(packet_type) \
|
||||||
#define SYCL_PRSQRT(packet_type) \
|
template <> \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type prsqrt<packet_type>( \
|
||||||
packet_type prsqrt<packet_type>(const packet_type& a) { return cl::sycl::rsqrt(a); }
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::rsqrt(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PRSQRT(cl::sycl::cl_float4)
|
SYCL_PRSQRT(cl::sycl::cl_float4)
|
||||||
SYCL_PRSQRT(cl::sycl::cl_double2)
|
SYCL_PRSQRT(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PRSQRT
|
#undef SYCL_PRSQRT
|
||||||
|
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
||||||
#define SYCL_PSIN(packet_type) \
|
#define SYCL_PSIN(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type psin<packet_type>(const packet_type& a) { return cl::sycl::sin(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psin<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::sin(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PSIN(cl::sycl::cl_float4)
|
SYCL_PSIN(cl::sycl::cl_float4)
|
||||||
SYCL_PSIN(cl::sycl::cl_double2)
|
SYCL_PSIN(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PSIN
|
#undef SYCL_PSIN
|
||||||
|
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
||||||
#define SYCL_PCOS(packet_type) \
|
#define SYCL_PCOS(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type pcos<packet_type>(const packet_type& a) { return cl::sycl::cos(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcos<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::cos(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PCOS(cl::sycl::cl_float4)
|
SYCL_PCOS(cl::sycl::cl_float4)
|
||||||
SYCL_PCOS(cl::sycl::cl_double2)
|
SYCL_PCOS(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PCOS
|
#undef SYCL_PCOS
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
||||||
#define SYCL_PTAN(packet_type) \
|
#define SYCL_PTAN(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type ptan<packet_type>(const packet_type& a) { return cl::sycl::tan(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptan<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::tan(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PTAN(cl::sycl::cl_float4)
|
SYCL_PTAN(cl::sycl::cl_float4)
|
||||||
SYCL_PTAN(cl::sycl::cl_double2)
|
SYCL_PTAN(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PTAN
|
#undef SYCL_PTAN
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
||||||
#define SYCL_PASIN(packet_type) \
|
#define SYCL_PASIN(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type pasin<packet_type>(const packet_type& a) { return cl::sycl::asin(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pasin<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::asin(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PASIN(cl::sycl::cl_float4)
|
SYCL_PASIN(cl::sycl::cl_float4)
|
||||||
SYCL_PASIN(cl::sycl::cl_double2)
|
SYCL_PASIN(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PASIN
|
#undef SYCL_PASIN
|
||||||
|
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
||||||
#define SYCL_PACOS(packet_type) \
|
#define SYCL_PACOS(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type pacos<packet_type>(const packet_type& a) { return cl::sycl::acos(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pacos<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::acos(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PACOS(cl::sycl::cl_float4)
|
SYCL_PACOS(cl::sycl::cl_float4)
|
||||||
SYCL_PACOS(cl::sycl::cl_double2)
|
SYCL_PACOS(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PACOS
|
#undef SYCL_PACOS
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
||||||
#define SYCL_PATAN(packet_type) \
|
#define SYCL_PATAN(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type patan<packet_type>(const packet_type& a) { return cl::sycl::atan(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type patan<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::atan(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PATAN(cl::sycl::cl_float4)
|
SYCL_PATAN(cl::sycl::cl_float4)
|
||||||
SYCL_PATAN(cl::sycl::cl_double2)
|
SYCL_PATAN(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PATAN
|
#undef SYCL_PATAN
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
||||||
#define SYCL_PSINH(packet_type) \
|
#define SYCL_PSINH(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type psinh<packet_type>(const packet_type& a) { return cl::sycl::sinh(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psinh<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::sinh(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PSINH(cl::sycl::cl_float4)
|
SYCL_PSINH(cl::sycl::cl_float4)
|
||||||
SYCL_PSINH(cl::sycl::cl_double2)
|
SYCL_PSINH(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PSINH
|
#undef SYCL_PSINH
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
|
||||||
#define SYCL_PCOSH(packet_type) \
|
#define SYCL_PCOSH(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type pcosh<packet_type>(const packet_type& a) { return cl::sycl::cosh(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcosh<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::cosh(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PCOSH(cl::sycl::cl_float4)
|
SYCL_PCOSH(cl::sycl::cl_float4)
|
||||||
SYCL_PCOSH(cl::sycl::cl_double2)
|
SYCL_PCOSH(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PCOSH
|
#undef SYCL_PCOSH
|
||||||
|
|
||||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
||||||
#define SYCL_PTANH(packet_type) \
|
#define SYCL_PTANH(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type ptanh<packet_type>(const packet_type& a) { return cl::sycl::tanh(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptanh<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::tanh(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PTANH(cl::sycl::cl_float4)
|
SYCL_PTANH(cl::sycl::cl_float4)
|
||||||
SYCL_PTANH(cl::sycl::cl_double2)
|
SYCL_PTANH(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PTANH
|
#undef SYCL_PTANH
|
||||||
|
|
||||||
#define SYCL_PCEIL(packet_type) \
|
#define SYCL_PCEIL(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type pceil<packet_type>(const packet_type& a) { return cl::sycl::ceil(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pceil<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::ceil(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PCEIL(cl::sycl::cl_float4)
|
SYCL_PCEIL(cl::sycl::cl_float4)
|
||||||
SYCL_PCEIL(cl::sycl::cl_double2)
|
SYCL_PCEIL(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PCEIL
|
#undef SYCL_PCEIL
|
||||||
|
|
||||||
|
#define SYCL_PROUND(packet_type) \
|
||||||
#define SYCL_PROUND(packet_type) \
|
template <> \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pround<packet_type>( \
|
||||||
packet_type pround<packet_type>(const packet_type& a) { return cl::sycl::round(a); }
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::round(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PROUND(cl::sycl::cl_float4)
|
SYCL_PROUND(cl::sycl::cl_float4)
|
||||||
SYCL_PROUND(cl::sycl::cl_double2)
|
SYCL_PROUND(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PROUND
|
#undef SYCL_PROUND
|
||||||
|
|
||||||
#define SYCL_FLOOR(packet_type) \
|
#define SYCL_FLOOR(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type pfloor<packet_type>(const packet_type& a) { return cl::sycl::floor(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pfloor<packet_type>( \
|
||||||
|
const packet_type& a) { \
|
||||||
|
return cl::sycl::floor(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_FLOOR(cl::sycl::cl_float4)
|
SYCL_FLOOR(cl::sycl::cl_float4)
|
||||||
SYCL_FLOOR(cl::sycl::cl_double2)
|
SYCL_FLOOR(cl::sycl::cl_double2)
|
||||||
#undef SYCL_FLOOR
|
#undef SYCL_FLOOR
|
||||||
|
|
||||||
|
#define SYCL_PMIN(packet_type, expr) \
|
||||||
#define SYCL_PMIN(packet_type, expr) \
|
template <> \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmin<packet_type>( \
|
||||||
packet_type pmin<packet_type>(const packet_type& a, const packet_type& b) { return expr; }
|
const packet_type& a, const packet_type& b) { \
|
||||||
|
return expr; \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b))
|
SYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b))
|
||||||
SYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b))
|
SYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b))
|
||||||
#undef SYCL_PMIN
|
#undef SYCL_PMIN
|
||||||
|
|
||||||
#define SYCL_PMAX(packet_type, expr) \
|
#define SYCL_PMAX(packet_type, expr) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
|
template <> \
|
||||||
packet_type pmax<packet_type>(const packet_type& a, const packet_type& b) { return expr; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmax<packet_type>( \
|
||||||
|
const packet_type& a, const packet_type& b) { \
|
||||||
|
return expr; \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b))
|
SYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b))
|
||||||
SYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b))
|
SYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b))
|
||||||
#undef SYCL_PMAX
|
#undef SYCL_PMAX
|
||||||
|
|
||||||
//#endif
|
#endif
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_MATH_FUNCTIONS_CUDA_H
|
#endif // EIGEN_MATH_FUNCTIONS_SYCL_H
|
||||||
|
@ -16,85 +16,122 @@
|
|||||||
* \brief:
|
* \brief:
|
||||||
* PacketMath
|
* PacketMath
|
||||||
*
|
*
|
||||||
*****************************************************************/
|
*****************************************************************/
|
||||||
|
|
||||||
#ifndef EIGEN_PACKET_MATH_SYCL_H
|
#ifndef EIGEN_PACKET_MATH_SYCL_H
|
||||||
#define EIGEN_PACKET_MATH_SYCL_H
|
#define EIGEN_PACKET_MATH_SYCL_H
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#if defined EIGEN_USE_SYCL
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
#ifdef SYCL_DEVICE_ONLY
|
||||||
|
|
||||||
#define SYCL_PLOADT_RO(address_space_target)\
|
#define SYCL_PLOADT_RO(address_space_target) \
|
||||||
template<typename packet_type, int Alignment>\
|
template <typename packet_type, int Alignment> \
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \
|
||||||
ploadt_ro(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
|
typename cl::sycl::multi_ptr< \
|
||||||
cl::sycl::access::address_space::address_space_target>::pointer_t from) {\
|
const typename unpacket_traits<packet_type>::type, \
|
||||||
typedef typename unpacket_traits<packet_type>::type scalar;\
|
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
||||||
typedef cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target> multi_ptr;\
|
from) { \
|
||||||
auto res=packet_type(static_cast<typename unpacket_traits<packet_type>::type>(0));\
|
typedef typename unpacket_traits<packet_type>::type scalar; \
|
||||||
res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from)));\
|
typedef cl::sycl::multi_ptr< \
|
||||||
return res;\
|
scalar, cl::sycl::access::address_space::address_space_target> \
|
||||||
}
|
multi_ptr; \
|
||||||
|
auto res = packet_type( \
|
||||||
|
static_cast<typename unpacket_traits<packet_type>::type>(0)); \
|
||||||
|
res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \
|
||||||
|
return res; \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PLOADT_RO(global_space)
|
SYCL_PLOADT_RO(global_space)
|
||||||
SYCL_PLOADT_RO(local_space)
|
SYCL_PLOADT_RO(local_space)
|
||||||
|
|
||||||
#undef SYCL_PLOADT_RO
|
#undef SYCL_PLOADT_RO
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <typename packet_type, int Alignment, typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
|
||||||
|
ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<
|
||||||
|
cl::sycl::access::mode::read_write, T>& from) {
|
||||||
|
return ploadt_ro<packet_type, Alignment>(from.get_pointer());
|
||||||
|
}
|
||||||
|
|
||||||
#define SYCL_PLOAD(address_space_target, Alignment, AlignedType)\
|
#ifdef SYCL_DEVICE_ONLY
|
||||||
template<typename packet_type> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
|
#define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \
|
||||||
pload##AlignedType(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
|
template <typename packet_type> \
|
||||||
cl::sycl::access::address_space::address_space_target>::pointer_t from) {\
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
|
||||||
return ploadt_ro<packet_type, Alignment>(from);\
|
typename cl::sycl::multi_ptr< \
|
||||||
}
|
const typename unpacket_traits<packet_type>::type, \
|
||||||
|
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
||||||
|
from) { \
|
||||||
|
return ploadt_ro<packet_type, Alignment>(from); \
|
||||||
|
}
|
||||||
|
|
||||||
// global space
|
// global space
|
||||||
SYCL_PLOAD(global_space, Unaligned, u)
|
SYCL_PLOAD(global_space, Unaligned, u)
|
||||||
SYCL_PLOAD(global_space, Aligned, )
|
SYCL_PLOAD(global_space, Aligned, )
|
||||||
|
|
||||||
// local space
|
// local space
|
||||||
SYCL_PLOAD(local_space, Unaligned, u)
|
SYCL_PLOAD(local_space, Unaligned, u)
|
||||||
SYCL_PLOAD(local_space, Aligned, )
|
SYCL_PLOAD(local_space, Aligned, )
|
||||||
|
|
||||||
// private space
|
#undef SYCL_PLOAD
|
||||||
//SYCL_PLOAD(private_space, Unaligned, u)
|
#endif
|
||||||
//SYCL_PLOAD(private_space, Aligned, )
|
|
||||||
|
|
||||||
|
#define SYCL_PLOAD(Alignment, AlignedType) \
|
||||||
|
template <typename packet_type> \
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
|
||||||
|
const Eigen::TensorSycl::internal::RangeAccess< \
|
||||||
|
cl::sycl::access::mode::read_write, \
|
||||||
|
typename unpacket_traits<packet_type>::type> \
|
||||||
|
from) { \
|
||||||
|
return ploadt_ro<packet_type, Alignment>(from); \
|
||||||
|
}
|
||||||
|
SYCL_PLOAD(Unaligned, u)
|
||||||
|
SYCL_PLOAD(Aligned, )
|
||||||
#undef SYCL_PLOAD
|
#undef SYCL_PLOAD
|
||||||
|
|
||||||
|
#ifdef SYCL_DEVICE_ONLY
|
||||||
/** \internal \returns a packet version of \a *from.
|
/** \internal \returns a packet version of \a *from.
|
||||||
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
* The pointer \a from must be aligned on a \a Alignment bytes boundary. */
|
||||||
#define SYCL_PLOADT(address_space_target)\
|
#define SYCL_PLOADT(address_space_target) \
|
||||||
template<typename packet_type, int Alignment>\
|
template <typename packet_type, int Alignment> \
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(\
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \
|
||||||
typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
|
typename cl::sycl::multi_ptr< \
|
||||||
cl::sycl::access::address_space::address_space_target>::pointer_t from)\
|
const typename unpacket_traits<packet_type>::type, \
|
||||||
{\
|
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
||||||
if(Alignment >= unpacket_traits<packet_type>::alignment)\
|
from) { \
|
||||||
return pload<packet_type>(from);\
|
if (Alignment >= unpacket_traits<packet_type>::alignment) \
|
||||||
else\
|
return pload<packet_type>(from); \
|
||||||
return ploadu<packet_type>(from);\
|
else \
|
||||||
}
|
return ploadu<packet_type>(from); \
|
||||||
|
}
|
||||||
|
|
||||||
// global space
|
// global space
|
||||||
SYCL_PLOADT(global_space)
|
SYCL_PLOADT(global_space)
|
||||||
// local space
|
// local space
|
||||||
SYCL_PLOADT(local_space)
|
SYCL_PLOADT(local_space)
|
||||||
|
#undef SYCL_PLOADT
|
||||||
|
#endif
|
||||||
|
|
||||||
//private_space
|
template <typename packet_type, int Alignment>
|
||||||
// There is no need to specialise it for private space as it can use the GenericPacketMath version
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
|
||||||
|
ploadt(const Eigen::TensorSycl::internal::RangeAccess<
|
||||||
|
cl::sycl::access::mode::read_write,
|
||||||
|
typename unpacket_traits<packet_type>::type>& from) {
|
||||||
|
return ploadt<packet_type, Alignment>(from.get_pointer());
|
||||||
|
}
|
||||||
|
#ifdef SYCL_DEVICE_ONLY
|
||||||
|
|
||||||
#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment)\
|
// private_space
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
|
#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \
|
||||||
ploadt_ro<packet_type, Alignment>(const typename unpacket_traits<packet_type>::type * from) { \
|
template <> \
|
||||||
typedef typename unpacket_traits<packet_type>::type scalar;\
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \
|
||||||
auto res=packet_type(static_cast<scalar>(0));\
|
ploadt_ro<packet_type, Alignment>( \
|
||||||
res. template load<cl::sycl::access::address_space::private_space>(0, const_cast<scalar*>(from));\
|
const typename unpacket_traits<packet_type>::type* from) { \
|
||||||
return res;\
|
typedef typename unpacket_traits<packet_type>::type scalar; \
|
||||||
|
auto res = packet_type(static_cast<scalar>(0)); \
|
||||||
|
res.template load<cl::sycl::access::address_space::private_space>( \
|
||||||
|
0, const_cast<scalar*>(from)); \
|
||||||
|
return res; \
|
||||||
}
|
}
|
||||||
|
|
||||||
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
|
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
|
||||||
@ -102,37 +139,42 @@ SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
|
|||||||
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
|
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
|
||||||
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
|
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
|
||||||
|
|
||||||
|
#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \
|
||||||
#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type)\
|
template <> \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \
|
||||||
pload##alignment_type(const typename unpacket_traits<packet_type>::type * from) { \
|
const typename unpacket_traits<packet_type>::type* from) { \
|
||||||
typedef typename unpacket_traits<packet_type>::type scalar;\
|
typedef typename unpacket_traits<packet_type>::type scalar; \
|
||||||
auto res=packet_type(static_cast<scalar>(0));\
|
auto res = packet_type(static_cast<scalar>(0)); \
|
||||||
res. template load<cl::sycl::access::address_space::private_space>(0, const_cast<scalar*>(from));\
|
res.template load<cl::sycl::access::address_space::private_space>( \
|
||||||
return res;\
|
0, const_cast<scalar*>(from)); \
|
||||||
|
return res; \
|
||||||
}
|
}
|
||||||
SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4,)
|
SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
|
||||||
SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2,)
|
SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
|
||||||
SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
|
SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
|
||||||
SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
|
SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
|
||||||
|
|
||||||
#undef SYCL_PLOAD_SPECIAL
|
#undef SYCL_PLOAD_SPECIAL
|
||||||
|
|
||||||
#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment)\
|
#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \
|
||||||
template<>\
|
template <> \
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
|
||||||
typename cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target>::pointer_t to, \
|
typename cl::sycl::multi_ptr< \
|
||||||
const packet_type& from) {\
|
scalar, \
|
||||||
typedef cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target> multi_ptr;\
|
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
||||||
from.store(0, multi_ptr(to));\
|
to, \
|
||||||
}
|
const packet_type& from) { \
|
||||||
|
typedef cl::sycl::multi_ptr< \
|
||||||
|
scalar, cl::sycl::access::address_space::address_space_target> \
|
||||||
|
multi_ptr; \
|
||||||
|
from.store(0, multi_ptr(to)); \
|
||||||
|
}
|
||||||
|
|
||||||
// global space
|
// global space
|
||||||
SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
|
SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
|
||||||
SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
|
SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
|
||||||
SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
|
SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
|
||||||
SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
|
SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
|
||||||
|
|
||||||
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
|
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
|
||||||
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
|
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
|
||||||
SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
|
SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
|
||||||
@ -142,36 +184,34 @@ SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
|
|||||||
SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
|
SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
|
||||||
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
|
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
|
||||||
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
|
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
|
||||||
|
#undef SYCL_PSTORE
|
||||||
|
|
||||||
|
#define SYCL_PSTORE_T(address_space_target) \
|
||||||
|
template <typename scalar, typename packet_type, int Alignment> \
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \
|
||||||
|
typename cl::sycl::multi_ptr< \
|
||||||
|
scalar, \
|
||||||
|
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
||||||
|
to, \
|
||||||
|
const packet_type& from) { \
|
||||||
|
if (Alignment) \
|
||||||
|
pstore(to, from); \
|
||||||
|
else \
|
||||||
|
pstoreu(to, from); \
|
||||||
|
}
|
||||||
|
|
||||||
#define SYCL_PSTORE_T(scalar, packet_type, Alignment)\
|
SYCL_PSTORE_T(global_space)
|
||||||
template<>\
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret<scalar, packet_type, Alignment>(\
|
|
||||||
scalar* to,\
|
|
||||||
const packet_type& from) {\
|
|
||||||
if(Alignment)\
|
|
||||||
pstore(to, from);\
|
|
||||||
else\
|
|
||||||
pstoreu(to,from);\
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
SYCL_PSTORE_T(float, cl::sycl::cl_float4, Aligned)
|
|
||||||
|
|
||||||
SYCL_PSTORE_T(float, cl::sycl::cl_float4, Unaligned)
|
|
||||||
|
|
||||||
SYCL_PSTORE_T(double, cl::sycl::cl_double2, Aligned)
|
|
||||||
|
|
||||||
SYCL_PSTORE_T(double, cl::sycl::cl_double2, Unaligned)
|
|
||||||
|
|
||||||
|
SYCL_PSTORE_T(local_space)
|
||||||
|
|
||||||
#undef SYCL_PSTORE_T
|
#undef SYCL_PSTORE_T
|
||||||
|
|
||||||
#define SYCL_PSET1(packet_type)\
|
#define SYCL_PSET1(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>(\
|
template <> \
|
||||||
const typename unpacket_traits<packet_type>::type& from) {\
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
|
||||||
return packet_type(from);\
|
const typename unpacket_traits<packet_type>::type& from) { \
|
||||||
}
|
return packet_type(from); \
|
||||||
|
}
|
||||||
|
|
||||||
// global space
|
// global space
|
||||||
SYCL_PSET1(cl::sycl::cl_float4)
|
SYCL_PSET1(cl::sycl::cl_float4)
|
||||||
@ -179,280 +219,343 @@ SYCL_PSET1(cl::sycl::cl_double2)
|
|||||||
|
|
||||||
#undef SYCL_PSET1
|
#undef SYCL_PSET1
|
||||||
|
|
||||||
|
template <typename packet_type>
|
||||||
template <typename packet_type> struct get_base_packet {
|
struct get_base_packet {
|
||||||
template <typename sycl_multi_pointer>
|
template <typename sycl_multi_pointer>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer ) {}
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
|
||||||
|
get_ploaddup(sycl_multi_pointer) {}
|
||||||
|
|
||||||
template <typename sycl_multi_pointer>
|
template <typename sycl_multi_pointer>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer , Index ) {}
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
|
||||||
|
get_pgather(sycl_multi_pointer, Index) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct get_base_packet <cl::sycl::cl_float4> {
|
template <>
|
||||||
|
struct get_base_packet<cl::sycl::cl_float4> {
|
||||||
template <typename sycl_multi_pointer>
|
template <typename sycl_multi_pointer>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
|
||||||
|
sycl_multi_pointer from) {
|
||||||
return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
|
return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
|
||||||
}
|
}
|
||||||
template <typename sycl_multi_pointer>
|
template <typename sycl_multi_pointer>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
|
||||||
return cl::sycl::cl_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
|
sycl_multi_pointer from, Index stride) {
|
||||||
|
return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
|
||||||
|
from[2 * stride], from[3 * stride]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename sycl_multi_pointer>
|
template <typename sycl_multi_pointer>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_float4& from, Index stride) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
|
||||||
|
sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {
|
||||||
auto tmp = stride;
|
auto tmp = stride;
|
||||||
to[0] = from.x();
|
to[0] = from.x();
|
||||||
to[tmp] = from.y();
|
to[tmp] = from.y();
|
||||||
to[tmp += stride] = from.z();
|
to[tmp += stride] = from.z();
|
||||||
to[tmp += stride] = from.w();
|
to[tmp += stride] = from.w();
|
||||||
}
|
}
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
|
||||||
return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a+1), static_cast<float>(a+2), static_cast<float>(a+3));
|
const float& a) {
|
||||||
}
|
return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
|
||||||
|
static_cast<float>(a + 2),
|
||||||
|
static_cast<float>(a + 3));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct get_base_packet <cl::sycl::cl_double2> {
|
template <>
|
||||||
|
struct get_base_packet<cl::sycl::cl_double2> {
|
||||||
template <typename sycl_multi_pointer>
|
template <typename sycl_multi_pointer>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
|
||||||
|
get_ploaddup(const sycl_multi_pointer from) {
|
||||||
return cl::sycl::cl_double2(from[0], from[0]);
|
return cl::sycl::cl_double2(from[0], from[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename sycl_multi_pointer, typename Index>
|
template <typename sycl_multi_pointer, typename Index>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from, Index stride) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
|
||||||
return cl::sycl::cl_double2(from[0*stride], from[1*stride]);
|
const sycl_multi_pointer from, Index stride) {
|
||||||
|
return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename sycl_multi_pointer>
|
template <typename sycl_multi_pointer>
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_double2& from, Index stride) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
|
||||||
|
sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {
|
||||||
to[0] = from.x();
|
to[0] = from.x();
|
||||||
to[stride] = from.y();
|
to[stride] = from.y();
|
||||||
}
|
}
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
|
||||||
return cl::sycl::cl_double2(static_cast<double>(a), static_cast<double>(a + 1));
|
const double& a) {
|
||||||
|
return cl::sycl::cl_double2(static_cast<double>(a),
|
||||||
|
static_cast<double>(a + 1));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#define SYCL_PLOAD_DUP(address_space_target)\
|
#define SYCL_PLOAD_DUP(address_space_target) \
|
||||||
template<typename packet_type> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
|
template <typename packet_type> \
|
||||||
ploaddup(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \
|
||||||
cl::sycl::access::address_space::address_space_target>::pointer_t from)\
|
typename cl::sycl::multi_ptr< \
|
||||||
{\
|
const typename unpacket_traits<packet_type>::type, \
|
||||||
return get_base_packet<packet_type>::get_ploaddup(from); \
|
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
||||||
}
|
from) { \
|
||||||
|
return get_base_packet<packet_type>::get_ploaddup(from); \
|
||||||
|
}
|
||||||
|
|
||||||
// global space
|
// global space
|
||||||
SYCL_PLOAD_DUP(global_space)
|
SYCL_PLOAD_DUP(global_space)
|
||||||
// local_space
|
// local_space
|
||||||
SYCL_PLOAD_DUP(local_space)
|
SYCL_PLOAD_DUP(local_space)
|
||||||
// private_space
|
|
||||||
//SYCL_PLOAD_DUP(private_space)
|
|
||||||
#undef SYCL_PLOAD_DUP
|
#undef SYCL_PLOAD_DUP
|
||||||
|
|
||||||
#define SYCL_PLOAD_DUP_SPECILIZE(packet_type)\
|
#define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
|
template <> \
|
||||||
ploaddup<packet_type>(const typename unpacket_traits<packet_type>::type * from)\
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
|
||||||
{ \
|
const typename unpacket_traits<packet_type>::type* from) { \
|
||||||
return get_base_packet<packet_type>::get_ploaddup(from); \
|
return get_base_packet<packet_type>::get_ploaddup(from); \
|
||||||
}
|
}
|
||||||
|
|
||||||
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
|
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
|
||||||
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
|
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
|
||||||
|
|
||||||
#undef SYCL_PLOAD_DUP_SPECILIZE
|
#undef SYCL_PLOAD_DUP_SPECILIZE
|
||||||
|
|
||||||
#define SYCL_PLSET(packet_type)\
|
#define SYCL_PLSET(packet_type) \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>(const typename unpacket_traits<packet_type>::type& a) {\
|
template <> \
|
||||||
return get_base_packet<packet_type>::set_plset(a);\
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
|
||||||
}
|
const typename unpacket_traits<packet_type>::type& a) { \
|
||||||
|
return get_base_packet<packet_type>::set_plset(a); \
|
||||||
|
}
|
||||||
|
|
||||||
SYCL_PLSET(cl::sycl::cl_float4)
|
SYCL_PLSET(cl::sycl::cl_float4)
|
||||||
SYCL_PLSET(cl::sycl::cl_double2)
|
SYCL_PLSET(cl::sycl::cl_double2)
|
||||||
|
|
||||||
#undef SYCL_PLSET
|
#undef SYCL_PLSET
|
||||||
|
|
||||||
|
#define SYCL_PGATHER(address_space_target) \
|
||||||
#define SYCL_PGATHER(address_space_target)\
|
template <typename Scalar, typename packet_type> \
|
||||||
template<typename Scalar, typename packet_type> EIGEN_DEVICE_FUNC inline packet_type pgather(\
|
EIGEN_DEVICE_FUNC inline packet_type pgather( \
|
||||||
typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
|
typename cl::sycl::multi_ptr< \
|
||||||
cl::sycl::access::address_space::address_space_target>::pointer_t from, Index stride) {\
|
const typename unpacket_traits<packet_type>::type, \
|
||||||
return get_base_packet<packet_type>::get_pgather(from, stride); \
|
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
||||||
}
|
from, \
|
||||||
|
Index stride) { \
|
||||||
|
return get_base_packet<packet_type>::get_pgather(from, stride); \
|
||||||
|
}
|
||||||
|
|
||||||
// global space
|
// global space
|
||||||
SYCL_PGATHER(global_space)
|
SYCL_PGATHER(global_space)
|
||||||
// local space
|
// local space
|
||||||
SYCL_PGATHER(local_space)
|
SYCL_PGATHER(local_space)
|
||||||
// private space
|
|
||||||
//SYCL_PGATHER(private_space)
|
|
||||||
|
|
||||||
#undef SYCL_PGATHER
|
#undef SYCL_PGATHER
|
||||||
|
|
||||||
|
#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
|
||||||
#define SYCL_PGATHER_SPECILIZE(scalar, packet_type)\
|
template <> \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
|
||||||
pgather<scalar, packet_type>(const typename unpacket_traits<packet_type>::type * from, Index stride)\
|
pgather<scalar, packet_type>( \
|
||||||
{ \
|
const typename unpacket_traits<packet_type>::type* from, Index stride) { \
|
||||||
return get_base_packet<packet_type>::get_pgather(from, stride); \
|
return get_base_packet<packet_type>::get_pgather(from, stride); \
|
||||||
}
|
}
|
||||||
|
|
||||||
SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
|
SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
|
||||||
SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
|
SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
|
||||||
|
|
||||||
#undef SYCL_PGATHER_SPECILIZE
|
#undef SYCL_PGATHER_SPECILIZE
|
||||||
|
|
||||||
#define SYCL_PSCATTER(address_space_target)\
|
#define SYCL_PSCATTER(address_space_target) \
|
||||||
template<typename Scalar, typename packet_type> EIGEN_DEVICE_FUNC inline void pscatter(\
|
template <typename Scalar, typename packet_type> \
|
||||||
typename cl::sycl::multi_ptr<typename unpacket_traits<packet_type>::type,\
|
EIGEN_DEVICE_FUNC inline void pscatter( \
|
||||||
cl::sycl::access::address_space::address_space_target>::pointer_t to,\
|
typename cl::sycl::multi_ptr< \
|
||||||
const packet_type& from, Index stride) {\
|
typename unpacket_traits<packet_type>::type, \
|
||||||
get_base_packet<packet_type>::set_pscatter(to, from, stride);\
|
cl::sycl::access::address_space::address_space_target>::pointer_t \
|
||||||
}
|
to, \
|
||||||
|
const packet_type& from, Index stride) { \
|
||||||
|
get_base_packet<packet_type>::set_pscatter(to, from, stride); \
|
||||||
|
}
|
||||||
|
|
||||||
// global space
|
// global space
|
||||||
SYCL_PSCATTER(global_space)
|
SYCL_PSCATTER(global_space)
|
||||||
// local space
|
// local space
|
||||||
SYCL_PSCATTER(local_space)
|
SYCL_PSCATTER(local_space)
|
||||||
// private space
|
|
||||||
//SYCL_PSCATTER(private_space)
|
|
||||||
|
|
||||||
#undef SYCL_PSCATTER
|
#undef SYCL_PSCATTER
|
||||||
|
|
||||||
|
#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
|
||||||
|
template <> \
|
||||||
#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type)\
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void \
|
typename unpacket_traits<packet_type>::type * to, \
|
||||||
pscatter<scalar, packet_type>(typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride)\
|
const packet_type& from, Index stride) { \
|
||||||
{ \
|
get_base_packet<packet_type>::set_pscatter(to, from, stride); \
|
||||||
get_base_packet<packet_type>::set_pscatter(to, from, stride);\
|
}
|
||||||
}
|
|
||||||
|
|
||||||
SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
|
SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
|
||||||
SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
|
SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
|
||||||
|
|
||||||
#undef SYCL_PSCATTER_SPECILIZE
|
#undef SYCL_PSCATTER_SPECILIZE
|
||||||
|
|
||||||
|
#define SYCL_PMAD(packet_type) \
|
||||||
#define SYCL_PMAD(packet_type)\
|
template <> \
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( const packet_type& a,\
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \
|
||||||
const packet_type& b, const packet_type& c){\
|
const packet_type& a, const packet_type& b, const packet_type& c) { \
|
||||||
return cl::sycl::mad(a,b,c);\
|
return cl::sycl::mad(a, b, c); \
|
||||||
}
|
}
|
||||||
|
|
||||||
SYCL_PMAD(cl::sycl::cl_float4)
|
SYCL_PMAD(cl::sycl::cl_float4)
|
||||||
SYCL_PMAD(cl::sycl::cl_double2)
|
SYCL_PMAD(cl::sycl::cl_double2)
|
||||||
#undef SYCL_PMAD
|
#undef SYCL_PMAD
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
const cl::sycl::cl_float4& a) {
|
||||||
return a.x();
|
return a.x();
|
||||||
}
|
}
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
|
||||||
|
const cl::sycl::cl_double2& a) {
|
||||||
return a.x();
|
return a.x();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(
|
||||||
|
const cl::sycl::cl_float4& a) {
|
||||||
return a.x() + a.y() + a.z() + a.w();
|
return a.x() + a.y() + a.z() + a.w();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
|
||||||
|
const cl::sycl::cl_double2& a) {
|
||||||
return a.x() + a.y();
|
return a.x() + a.y();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
template <>
|
||||||
return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(
|
||||||
|
const cl::sycl::cl_float4& a) {
|
||||||
|
return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),
|
||||||
|
cl::sycl::fmax(a.z(), a.w()));
|
||||||
}
|
}
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(
|
||||||
|
const cl::sycl::cl_double2& a) {
|
||||||
return cl::sycl::fmax(a.x(), a.y());
|
return cl::sycl::fmax(a.x(), a.y());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
template <>
|
||||||
return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(
|
||||||
|
const cl::sycl::cl_float4& a) {
|
||||||
|
return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),
|
||||||
|
cl::sycl::fmin(a.z(), a.w()));
|
||||||
}
|
}
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
|
||||||
|
const cl::sycl::cl_double2& a) {
|
||||||
return cl::sycl::fmin(a.x(), a.y());
|
return cl::sycl::fmin(a.x(), a.y());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(
|
||||||
|
const cl::sycl::cl_float4& a) {
|
||||||
return a.x() * a.y() * a.z() * a.w();
|
return a.x() * a.y() * a.z() * a.w();
|
||||||
}
|
}
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
|
||||||
|
const cl::sycl::cl_double2& a) {
|
||||||
return a.x() * a.y();
|
return a.x() * a.y();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
template <>
|
||||||
return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
|
||||||
|
pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
|
||||||
|
return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),
|
||||||
|
cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
|
||||||
}
|
}
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
|
||||||
|
pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
|
||||||
return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
|
return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
|
||||||
ptranspose(PacketBlock<cl::sycl::cl_float4,4>& kernel) {
|
PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
|
||||||
float tmp = kernel.packet[0].y();
|
float tmp = kernel.packet[0].y();
|
||||||
kernel.packet[0].y() = kernel.packet[1].x();
|
kernel.packet[0].y() = kernel.packet[1].x();
|
||||||
kernel.packet[1].x() = tmp;
|
kernel.packet[1].x() = tmp;
|
||||||
// std::swap(kernel.packet[0].y(), kernel.packet[1].x());
|
|
||||||
|
|
||||||
tmp = kernel.packet[0].z();
|
tmp = kernel.packet[0].z();
|
||||||
kernel.packet[0].z() = kernel.packet[2].x();
|
kernel.packet[0].z() = kernel.packet[2].x();
|
||||||
kernel.packet[2].x() = tmp;
|
kernel.packet[2].x() = tmp;
|
||||||
//std::swap(kernel.packet[0].z(), kernel.packet[2].x());
|
|
||||||
|
|
||||||
tmp = kernel.packet[0].w();
|
tmp = kernel.packet[0].w();
|
||||||
kernel.packet[0].w() = kernel.packet[3].x();
|
kernel.packet[0].w() = kernel.packet[3].x();
|
||||||
kernel.packet[3].x() = tmp;
|
kernel.packet[3].x() = tmp;
|
||||||
|
|
||||||
//std::swap(kernel.packet[0].w(), kernel.packet[3].x());
|
|
||||||
|
|
||||||
tmp = kernel.packet[1].z();
|
tmp = kernel.packet[1].z();
|
||||||
kernel.packet[1].z() = kernel.packet[2].y();
|
kernel.packet[1].z() = kernel.packet[2].y();
|
||||||
kernel.packet[2].y() = tmp;
|
kernel.packet[2].y() = tmp;
|
||||||
// std::swap(kernel.packet[1].z(), kernel.packet[2].y());
|
|
||||||
|
|
||||||
tmp = kernel.packet[1].w();
|
tmp = kernel.packet[1].w();
|
||||||
kernel.packet[1].w() = kernel.packet[3].y();
|
kernel.packet[1].w() = kernel.packet[3].y();
|
||||||
kernel.packet[3].y() = tmp;
|
kernel.packet[3].y() = tmp;
|
||||||
// std::swap(kernel.packet[1].w(), kernel.packet[3].y());
|
|
||||||
|
|
||||||
tmp = kernel.packet[2].w();
|
tmp = kernel.packet[2].w();
|
||||||
kernel.packet[2].w() = kernel.packet[3].z();
|
kernel.packet[2].w() = kernel.packet[3].z();
|
||||||
kernel.packet[3].z() = tmp;
|
kernel.packet[3].z() = tmp;
|
||||||
// std::swap(kernel.packet[2].w(), kernel.packet[3].z());
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
|
||||||
ptranspose(PacketBlock<cl::sycl::cl_double2,2>& kernel) {
|
PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
|
||||||
double tmp = kernel.packet[0].y();
|
double tmp = kernel.packet[0].y();
|
||||||
kernel.packet[0].y() = kernel.packet[1].x();
|
kernel.packet[0].y() = kernel.packet[1].x();
|
||||||
kernel.packet[1].x() = tmp;
|
kernel.packet[1].x() = tmp;
|
||||||
//std::swap(kernel.packet[0].y(), kernel.packet[1].x());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
|
||||||
pblend(const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
|
const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
|
||||||
const cl::sycl::cl_float4& thenPacket, const cl::sycl::cl_float4& elsePacket) {
|
const cl::sycl::cl_float4& thenPacket,
|
||||||
cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1,
|
const cl::sycl::cl_float4& elsePacket) {
|
||||||
ifPacket.select[1] ? 0 : -1,
|
cl::sycl::cl_int4 condition(
|
||||||
ifPacket.select[2] ? 0 : -1,
|
ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
|
||||||
ifPacket.select[3] ? 0 : -1);
|
ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
|
||||||
return cl::sycl::select(thenPacket, elsePacket, condition);
|
return cl::sycl::select(thenPacket, elsePacket, condition);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> inline cl::sycl::cl_double2
|
template <>
|
||||||
pblend(const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
|
inline cl::sycl::cl_double2 pblend(
|
||||||
const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) {
|
const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
|
||||||
|
const cl::sycl::cl_double2& thenPacket,
|
||||||
|
const cl::sycl::cl_double2& elsePacket) {
|
||||||
cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
|
cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
|
||||||
ifPacket.select[1] ? 0 : -1);
|
ifPacket.select[1] ? 0 : -1);
|
||||||
return cl::sycl::select(thenPacket, elsePacket, condition);
|
return cl::sycl::select(thenPacket, elsePacket, condition);
|
||||||
}
|
}
|
||||||
|
#endif // SYCL_DEVICE_ONLY
|
||||||
|
|
||||||
} // end namespace internal
|
#define SYCL_PSTORE(alignment) \
|
||||||
|
template <typename packet_type> \
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
|
||||||
|
const Eigen::TensorSycl::internal::RangeAccess< \
|
||||||
|
cl::sycl::access::mode::read_write, \
|
||||||
|
typename unpacket_traits<packet_type>::type>& to, \
|
||||||
|
const packet_type& from) { \
|
||||||
|
pstore##alignment(to.get_pointer(), from); \
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace Eigen
|
// global space
|
||||||
|
SYCL_PSTORE()
|
||||||
|
SYCL_PSTORE(u)
|
||||||
|
|
||||||
#endif // EIGEN_USE_SYCL
|
#undef SYCL_PSTORE
|
||||||
#endif // EIGEN_PACKET_MATH_SYCL_H
|
|
||||||
|
template <typename scalar, typename packet_type, int Alignment>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(
|
||||||
|
Eigen::TensorSycl::internal::RangeAccess<
|
||||||
|
cl::sycl::access::mode::read_write,
|
||||||
|
typename unpacket_traits<packet_type>::type>
|
||||||
|
to,
|
||||||
|
const packet_type& from) {
|
||||||
|
pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_PACKET_MATH_SYCL_H
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
* \brief:
|
* \brief:
|
||||||
* TypeCasting
|
* TypeCasting
|
||||||
*
|
*
|
||||||
*****************************************************************/
|
*****************************************************************/
|
||||||
|
|
||||||
#ifndef EIGEN_TYPE_CASTING_SYCL_H
|
#ifndef EIGEN_TYPE_CASTING_SYCL_H
|
||||||
#define EIGEN_TYPE_CASTING_SYCL_H
|
#define EIGEN_TYPE_CASTING_SYCL_H
|
||||||
@ -24,66 +24,62 @@
|
|||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
#ifdef __SYCL_DEVICE_ONLY__
|
#ifdef SYCL_DEVICE_ONLY
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<float, int> {
|
struct type_casting_traits<float, int> {
|
||||||
enum {
|
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
||||||
VectorizedCast = 1,
|
|
||||||
SrcCoeffRatio = 1,
|
|
||||||
TgtCoeffRatio = 1
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4 pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {
|
template <>
|
||||||
return a. template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4
|
||||||
|
pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {
|
||||||
|
return a
|
||||||
|
.template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<int, float> {
|
struct type_casting_traits<int, float> {
|
||||||
enum {
|
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
|
||||||
VectorizedCast = 1,
|
|
||||||
SrcCoeffRatio = 1,
|
|
||||||
TgtCoeffRatio = 1
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {
|
template <>
|
||||||
return a. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
|
||||||
|
pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {
|
||||||
|
return a.template convert<cl::sycl::cl_float,
|
||||||
|
cl::sycl::rounding_mode::automatic>();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<double, float> {
|
struct type_casting_traits<double, float> {
|
||||||
enum {
|
enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
|
||||||
VectorizedCast = 1,
|
|
||||||
SrcCoeffRatio = 2,
|
|
||||||
TgtCoeffRatio = 1
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
|
template <>
|
||||||
auto a1=a. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
|
||||||
auto b1=b. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
|
pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(
|
||||||
|
const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
|
||||||
|
auto a1 = a.template convert<cl::sycl::cl_float,
|
||||||
|
cl::sycl::rounding_mode::automatic>();
|
||||||
|
auto b1 = b.template convert<cl::sycl::cl_float,
|
||||||
|
cl::sycl::rounding_mode::automatic>();
|
||||||
return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y());
|
return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<float, double> {
|
struct type_casting_traits<float, double> {
|
||||||
enum {
|
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
|
||||||
VectorizedCast = 1,
|
|
||||||
SrcCoeffRatio = 1,
|
|
||||||
TgtCoeffRatio = 2
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
|
||||||
|
pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {
|
||||||
// Simply discard the second half of the input
|
// Simply discard the second half of the input
|
||||||
return cl::sycl::cl_double2(a.x(), a.y());
|
return cl::sycl::cl_double2(a.x(), a.y());
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_TYPE_CASTING_SYCL_H
|
#endif // EIGEN_TYPE_CASTING_SYCL_H
|
||||||
|
@ -762,7 +762,7 @@ template<typename Scalar> struct scalar_isnan_op {
|
|||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
|
||||||
typedef bool result_type;
|
typedef bool result_type;
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
return numext::isnan(a);
|
return numext::isnan(a);
|
||||||
#else
|
#else
|
||||||
return (numext::isnan)(a);
|
return (numext::isnan)(a);
|
||||||
@ -786,7 +786,7 @@ template<typename Scalar> struct scalar_isinf_op {
|
|||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
|
||||||
typedef bool result_type;
|
typedef bool result_type;
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
return numext::isinf(a);
|
return numext::isinf(a);
|
||||||
#else
|
#else
|
||||||
return (numext::isinf)(a);
|
return (numext::isinf)(a);
|
||||||
@ -810,7 +810,7 @@ template<typename Scalar> struct scalar_isfinite_op {
|
|||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
|
||||||
typedef bool result_type;
|
typedef bool result_type;
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
return numext::isfinite(a);
|
return numext::isfinite(a);
|
||||||
#else
|
#else
|
||||||
return (numext::isfinite)(a);
|
return (numext::isfinite)(a);
|
||||||
|
@ -497,6 +497,12 @@
|
|||||||
//
|
//
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
// EIGEN_USE_SYCL is a user-defined macro while __SYCL_DEVICE_ONLY__ is a compiler-defined macro.
|
||||||
|
// In most cases we want to check if both macros are defined which can be done using the define below.
|
||||||
|
#define SYCL_DEVICE_ONLY
|
||||||
|
#endif
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------------
|
||||||
// Detect Compiler/Architecture/OS specific features
|
// Detect Compiler/Architecture/OS specific features
|
||||||
//------------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------------
|
||||||
@ -583,7 +589,7 @@
|
|||||||
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
|
((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \
|
||||||
|| (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
|
|| (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
|
||||||
|| (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \
|
|| (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \
|
||||||
|| (EIGEN_COMP_MSVC >= 1900) || defined(__SYCL_DEVICE_ONLY__))
|
|| (EIGEN_COMP_MSVC >= 1900) || defined(SYCL_DEVICE_ONLY))
|
||||||
#define EIGEN_HAS_C99_MATH 1
|
#define EIGEN_HAS_C99_MATH 1
|
||||||
#else
|
#else
|
||||||
#define EIGEN_HAS_C99_MATH 0
|
#define EIGEN_HAS_C99_MATH 0
|
||||||
@ -639,7 +645,7 @@
|
|||||||
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
|
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
|
||||||
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
|
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
|
||||||
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
|
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
|
||||||
#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(__SYCL_DEVICE_ONLY__)
|
#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(SYCL_DEVICE_ONLY)
|
||||||
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
|
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
|
||||||
#else
|
#else
|
||||||
#define EIGEN_HAS_VARIADIC_TEMPLATES 0
|
#define EIGEN_HAS_VARIADIC_TEMPLATES 0
|
||||||
@ -791,7 +797,7 @@
|
|||||||
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
||||||
// : function body not available
|
// : function body not available
|
||||||
// See also bug 1367
|
// See also bug 1367
|
||||||
#if EIGEN_GNUC_AT_LEAST(4,2)
|
#if EIGEN_GNUC_AT_LEAST(4,2) && !defined(SYCL_DEVICE_ONLY)
|
||||||
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
|
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
|
||||||
#else
|
#else
|
||||||
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
|
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
|
||||||
@ -814,7 +820,7 @@
|
|||||||
// GPU stuff
|
// GPU stuff
|
||||||
|
|
||||||
// Disable some features when compiling with GPU compilers (NVCC/clang-cuda/SYCL/HIPCC)
|
// Disable some features when compiling with GPU compilers (NVCC/clang-cuda/SYCL/HIPCC)
|
||||||
#if defined(EIGEN_CUDACC) || defined(__SYCL_DEVICE_ONLY__) || defined(EIGEN_HIPCC)
|
#if defined(EIGEN_CUDACC) || defined(SYCL_DEVICE_ONLY) || defined(EIGEN_HIPCC)
|
||||||
// Do not try asserts on device code
|
// Do not try asserts on device code
|
||||||
#ifndef EIGEN_NO_DEBUG
|
#ifndef EIGEN_NO_DEBUG
|
||||||
#define EIGEN_NO_DEBUG
|
#define EIGEN_NO_DEBUG
|
||||||
@ -829,9 +835,14 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
|
#ifndef EIGEN_DONT_VECTORIZE
|
||||||
|
#define EIGEN_DONT_VECTORIZE
|
||||||
|
#endif
|
||||||
|
#define EIGEN_DEVICE_FUNC __attribute__((always_inline))
|
||||||
// All functions callable from CUDA/HIP code must be qualified with __device__
|
// All functions callable from CUDA/HIP code must be qualified with __device__
|
||||||
#ifdef EIGEN_GPUCC
|
#elif defined(EIGEN_GPUCC)
|
||||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||||
#else
|
#else
|
||||||
#define EIGEN_DEVICE_FUNC
|
#define EIGEN_DEVICE_FUNC
|
||||||
#endif
|
#endif
|
||||||
@ -852,8 +863,12 @@
|
|||||||
|
|
||||||
// eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89
|
// eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89
|
||||||
#ifdef EIGEN_NO_DEBUG
|
#ifdef EIGEN_NO_DEBUG
|
||||||
#define eigen_plain_assert(x)
|
#ifdef SYCL_DEVICE_ONLY // used to silence the warning on SYCL device
|
||||||
#else
|
#define eigen_plain_assert(x) EIGEN_UNUSED_VARIABLE(x)
|
||||||
|
#else
|
||||||
|
#define eigen_plain_assert(x)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
#if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
|
#if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
@ -1211,7 +1226,7 @@ bool all(T t, Ts ... ts){ return t && all(ts...); }
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Wrapping #pragma unroll in a macro since it is required for SYCL
|
// Wrapping #pragma unroll in a macro since it is required for SYCL
|
||||||
#if defined(__SYCL_DEVICE_ONLY__)
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#define EIGEN_UNROLL_LOOP __pragma(unroll)
|
#define EIGEN_UNROLL_LOOP __pragma(unroll)
|
||||||
#else
|
#else
|
||||||
|
@ -70,7 +70,7 @@
|
|||||||
// protected by parenthesis against macro expansion, the min()/max() macros
|
// protected by parenthesis against macro expansion, the min()/max() macros
|
||||||
// are defined here and any not-parenthesized min/max call will cause a
|
// are defined here and any not-parenthesized min/max call will cause a
|
||||||
// compiler error.
|
// compiler error.
|
||||||
#if !defined(__HIPCC__)
|
#if !defined(__HIPCC__) && !defined(EIGEN_USE_SYCL)
|
||||||
//
|
//
|
||||||
// HIP header files include the following files
|
// HIP header files include the following files
|
||||||
// <thread>
|
// <thread>
|
||||||
@ -277,7 +277,7 @@ namespace Eigen
|
|||||||
}
|
}
|
||||||
#endif //EIGEN_EXCEPTIONS
|
#endif //EIGEN_EXCEPTIONS
|
||||||
|
|
||||||
#elif !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__) // EIGEN_DEBUG_ASSERTS
|
#elif !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(SYCL_DEVICE_ONLY) // EIGEN_DEBUG_ASSERTS
|
||||||
// see bug 89. The copy_bool here is working around a bug in gcc <= 4.3
|
// see bug 89. The copy_bool here is working around a bug in gcc <= 4.3
|
||||||
#define eigen_assert(a) \
|
#define eigen_assert(a) \
|
||||||
if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\
|
if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\
|
||||||
@ -334,7 +334,7 @@ namespace Eigen
|
|||||||
std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n";
|
std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__)
|
#if !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(SYCL_DEVICE_ONLY)
|
||||||
#define EIGEN_USE_CUSTOM_ASSERT
|
#define EIGEN_USE_CUSTOM_ASSERT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user