From c3f67063ed1e7e2795bfa5fc0b1cb031f3020b62 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 22 Feb 2023 17:44:53 +0000 Subject: [PATCH] [SYCL-2020]- null placeholder accessor issue in Reduction SYCL test --- .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 19 +++++++++++++- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 26 ++++++++++++++++--- unsupported/test/cxx11_tensor_device_sycl.cpp | 6 +++-- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index 8fdc8ba5f..f3141dbf4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -794,6 +794,17 @@ class QueueInterface { #endif } + template + EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess get_null_accessor() + const { + eigen_assert(null_buff_simulator.get_size() % sizeof(T) == 0 && "The null buffer size must be a multiple of sizeof(T)"); + const ptrdiff_t typed_size = null_buff_simulator.get_size() / sizeof(T); + eigen_assert(typed_size > 0); + auto typed_null_buff = + null_buff_simulator.template reinterpret(cl::sycl::range<1>(typed_size)); + return TensorSycl::internal::RangeAccess(typed_null_buff); + } + protected: EIGEN_STRONG_INLINE void set_latest_event(cl::sycl::event e) const { #ifdef EIGEN_SYCL_STORE_LATEST_EVENT @@ -852,6 +863,7 @@ class QueueInterface { /// SyclDevice. If a non-read-only pointer is needed to be accessed on the /// host we should manually deallocate it. mutable TensorSycl::internal::PointerMapper pMapper; + cl::sycl::buffer null_buff_simulator = cl::sycl::buffer(cl::sycl::range<1>(128)); #ifndef EIGEN_SYCL_NO_REUSE_BUFFERS mutable std::unordered_set scratch_buffers; #endif @@ -884,7 +896,12 @@ struct SyclDeviceBase { struct SyclDevice : public SyclDeviceBase { explicit SyclDevice(const QueueInterface *queue_stream) : SyclDeviceBase(queue_stream) {} - + + template + EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess + get_null_accessor() const { + return queue_stream()->template get_null_accessor(); + } // this is the accessor used to construct the evaluator template EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index ae03ba52d..fdb473336 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -541,6 +541,24 @@ class TensorReductionOp : public TensorBase struct TensorReductionEvaluatorBase; +namespace internal { +namespace reduction { + +template +EIGEN_ALWAYS_INLINE typename StorageMemory::Type get_null_value( + typename std::enable_if::value, const Device>::type& dev) { + return (dev.template get_null_accessor()); +} + +template +EIGEN_ALWAYS_INLINE typename StorageMemory::Type get_null_value( + typename std::enable_if::value, const Device>::type&) { + return NULL; +} + +}// end namespace reduction +} // end namespace internal + // Eval as rvalue template class MakePointer_, typename Device> struct TensorReductionEvaluatorBase, Device> @@ -603,8 +621,10 @@ static constexpr bool RunningOnGPU = false; static constexpr bool RunningFullReduction = (NumOutputDims==0); EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) - { + : m_impl(op.expression(), device), + m_reducer(op.reducer()), + m_result(internal::reduction::get_null_value(device)), + m_device(device) { EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), YOU_MADE_A_PROGRAMMING_MISTAKE); @@ -895,7 +915,7 @@ static constexpr bool RunningOnGPU = false; // binding placeholder accessors to a command group handler for SYCL EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { m_impl.bind(cgh); - if(m_result) m_result.bind(cgh); + m_result.bind(cgh); } #endif diff --git a/unsupported/test/cxx11_tensor_device_sycl.cpp b/unsupported/test/cxx11_tensor_device_sycl.cpp index 74e902645..7ba104b03 100644 --- a/unsupported/test/cxx11_tensor_device_sycl.cpp +++ b/unsupported/test/cxx11_tensor_device_sycl.cpp @@ -26,8 +26,10 @@ #ifdef SYCL_COMPILER_IS_DPCPP template struct cl::sycl::is_device_copyable< - const OffByOneScalar, - std::enable_if_t>::value>> : std::true_type {}; + OffByOneScalar, + std::enable_if_t>::value && + (std::is_const_v> || std::is_volatile_v>))>> + : std::true_type {}; #endif template