[SYCL-2020]- null placeholder accessor issue in Reduction SYCL test

This commit is contained in:
Mehdi Goli 2023-02-22 17:44:53 +00:00 committed by Rasmus Munk Larsen
parent 6bcd941ee3
commit c3f67063ed
3 changed files with 45 additions and 6 deletions

View File

@ -794,6 +794,17 @@ class QueueInterface {
#endif #endif
} }
template <typename T>
EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T> get_null_accessor()
const {
eigen_assert(null_buff_simulator.get_size() % sizeof(T) == 0 && "The null buffer size must be a multiple of sizeof(T)");
const ptrdiff_t typed_size = null_buff_simulator.get_size() / sizeof(T);
eigen_assert(typed_size > 0);
auto typed_null_buff =
null_buff_simulator.template reinterpret<T>(cl::sycl::range<1>(typed_size));
return TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>(typed_null_buff);
}
protected: protected:
EIGEN_STRONG_INLINE void set_latest_event(cl::sycl::event e) const { EIGEN_STRONG_INLINE void set_latest_event(cl::sycl::event e) const {
#ifdef EIGEN_SYCL_STORE_LATEST_EVENT #ifdef EIGEN_SYCL_STORE_LATEST_EVENT
@ -852,6 +863,7 @@ class QueueInterface {
/// SyclDevice. If a non-read-only pointer is needed to be accessed on the /// SyclDevice. If a non-read-only pointer is needed to be accessed on the
/// host we should manually deallocate it. /// host we should manually deallocate it.
mutable TensorSycl::internal::PointerMapper pMapper; mutable TensorSycl::internal::PointerMapper pMapper;
cl::sycl::buffer<uint8_t, 1> null_buff_simulator = cl::sycl::buffer<uint8_t, 1>(cl::sycl::range<1>(128));
#ifndef EIGEN_SYCL_NO_REUSE_BUFFERS #ifndef EIGEN_SYCL_NO_REUSE_BUFFERS
mutable std::unordered_set<void *> scratch_buffers; mutable std::unordered_set<void *> scratch_buffers;
#endif #endif
@ -885,6 +897,11 @@ struct SyclDevice : public SyclDeviceBase {
explicit SyclDevice(const QueueInterface *queue_stream) explicit SyclDevice(const QueueInterface *queue_stream)
: SyclDeviceBase(queue_stream) {} : SyclDeviceBase(queue_stream) {}
template <typename scalar_t>
EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, scalar_t>
get_null_accessor() const {
return queue_stream()->template get_null_accessor<scalar_t>();
}
// this is the accessor used to construct the evaluator // this is the accessor used to construct the evaluator
template <cl::sycl::access::mode AcMd, typename T> template <cl::sycl::access::mode AcMd, typename T>
EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess<AcMd, T> EIGEN_STRONG_INLINE TensorSycl::internal::RangeAccess<AcMd, T>

View File

@ -541,6 +541,24 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType,
template<typename ArgType, typename Device> template<typename ArgType, typename Device>
struct TensorReductionEvaluatorBase; struct TensorReductionEvaluatorBase;
namespace internal {
namespace reduction {
template <typename CoeffReturnType, typename Device>
EIGEN_ALWAYS_INLINE typename StorageMemory<CoeffReturnType, Device>::Type get_null_value(
typename std::enable_if<Eigen::internal::is_same<Device, Eigen::SyclDevice>::value, const Device>::type& dev) {
return (dev.template get_null_accessor<CoeffReturnType>());
}
template <typename CoeffReturnType, typename Device>
EIGEN_ALWAYS_INLINE typename StorageMemory<CoeffReturnType, Device>::Type get_null_value(
typename std::enable_if<!Eigen::internal::is_same<Device, Eigen::SyclDevice>::value, const Device>::type&) {
return NULL;
}
}// end namespace reduction
} // end namespace internal
// Eval as rvalue // Eval as rvalue
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
@ -603,8 +621,10 @@ static constexpr bool RunningOnGPU = false;
static constexpr bool RunningFullReduction = (NumOutputDims==0); static constexpr bool RunningFullReduction = (NumOutputDims==0);
EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device) EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) : m_impl(op.expression(), device),
{ m_reducer(op.reducer()),
m_result(internal::reduction::get_null_value<CoeffReturnType, Device>(device)),
m_device(device) {
EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
YOU_MADE_A_PROGRAMMING_MISTAKE); YOU_MADE_A_PROGRAMMING_MISTAKE);
@ -895,7 +915,7 @@ static constexpr bool RunningOnGPU = false;
// binding placeholder accessors to a command group handler for SYCL // binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh); m_impl.bind(cgh);
if(m_result) m_result.bind(cgh); m_result.bind(cgh);
} }
#endif #endif

View File

@ -26,8 +26,10 @@
#ifdef SYCL_COMPILER_IS_DPCPP #ifdef SYCL_COMPILER_IS_DPCPP
template <typename T> template <typename T>
struct cl::sycl::is_device_copyable< struct cl::sycl::is_device_copyable<
const OffByOneScalar<T>, OffByOneScalar<T>,
std::enable_if_t<!std::is_trivially_copyable<const OffByOneScalar<T>>::value>> : std::true_type {}; std::enable_if_t<!(!std::is_trivially_copyable<OffByOneScalar<T>>::value &&
(std::is_const_v<OffByOneScalar<T>> || std::is_volatile_v<OffByOneScalar<T>>))>>
: std::true_type {};
#endif #endif
template <typename DataType, int DataLayout, typename IndexType> template <typename DataType, int DataLayout, typename IndexType>