mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-20 21:33:14 +08:00
Applying Benoit's comment. Embedding synchronisation inside device memcpy so there is no need to externally call synchronise() for device memcopy.
This commit is contained in:
parent
e46e722381
commit
c6f7b33834
@ -234,7 +234,7 @@ struct SyclDevice {
|
|||||||
auto dst_acc =it2->second.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
|
auto dst_acc =it2->second.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
|
||||||
cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor<T>(src_acc, dst_acc, rng, i, offset));
|
cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor<T>(src_acc, dst_acc, rng, i, offset));
|
||||||
});
|
});
|
||||||
asynchronousExec();
|
synchronize();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The memcpyHostToDevice is used to copy the device only pointer to a host pointer. Using the device
|
/// The memcpyHostToDevice is used to copy the device only pointer to a host pointer. Using the device
|
||||||
@ -265,7 +265,7 @@ struct SyclDevice {
|
|||||||
auto dst_acc =dest_buf.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
|
auto dst_acc =dest_buf.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
|
||||||
cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, offset));
|
cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, offset));
|
||||||
});
|
});
|
||||||
asynchronousExec();
|
synchronize();
|
||||||
}
|
}
|
||||||
/// returning the sycl queue
|
/// returning the sycl queue
|
||||||
EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;}
|
EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;}
|
||||||
@ -308,7 +308,7 @@ struct SyclDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void asynchronousExec() const {
|
EIGEN_STRONG_INLINE void asynchronousExec() const {
|
||||||
///FIXEDME:: currently there is a race condition regarding the asynch scheduler.
|
///FIXEDME:: currently there is a race condition regarding the asynch scheduler.
|
||||||
//sycl_queue().throw_asynchronous();// does not pass. Temporarily disabled
|
//sycl_queue().throw_asynchronous();// does not pass. Temporarily disabled
|
||||||
sycl_queue().wait_and_throw(); //pass
|
sycl_queue().wait_and_throw(); //pass
|
||||||
|
|
||||||
|
@ -143,12 +143,12 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
CoeffReturnType* data() const { return m_buffer; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return m_buffer; }
|
||||||
|
|
||||||
/// required by sycl in order to extract the sycl accessor
|
/// required by sycl in order to extract the sycl accessor
|
||||||
const TensorEvaluator<ArgType, Device>& impl() { return m_impl; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() { return m_impl; }
|
||||||
/// used by sycl in order to build the sycl buffer
|
/// used by sycl in order to build the sycl buffer
|
||||||
const Device& device() const{return m_device;}
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const{return m_device;}
|
||||||
private:
|
private:
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const ArgType m_op;
|
const ArgType m_op;
|
||||||
|
@ -736,22 +736,12 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
|
for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
|
||||||
eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
|
eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
|
||||||
if(m_strides[i]>0){
|
if(m_strides[i]>0){
|
||||||
#ifndef __SYCL_DEVICE_ONLY__
|
|
||||||
startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
|
startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
|
||||||
stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
|
stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
|
||||||
#else
|
|
||||||
startIndicesClamped[i] = cl::sycl::clamp(static_cast<Index>(op.startIndices()[i]), static_cast<Index>(0), static_cast<Index>(m_impl.dimensions()[i]));
|
|
||||||
stopIndicesClamped[i] = cl::sycl::clamp(static_cast<Index>(op.stopIndices()[i]), static_cast<Index>(0), static_cast<Index>(m_impl.dimensions()[i]));
|
|
||||||
#endif
|
|
||||||
}else{
|
}else{
|
||||||
/* implies m_strides[i]<0 by assert */
|
/* implies m_strides[i]<0 by assert */
|
||||||
#ifndef __SYCL_DEVICE_ONLY__
|
|
||||||
startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
|
startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
|
||||||
stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
|
stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
|
||||||
#else
|
|
||||||
startIndicesClamped[i] = cl::sycl::clamp(static_cast<Index>(op.startIndices()[i]), static_cast<Index>(-1), static_cast<Index>(m_impl.dimensions()[i] - 1));
|
|
||||||
stopIndicesClamped[i] = cl::sycl::clamp(static_cast<Index>(op.stopIndices()[i]), static_cast<Index>(-1), static_cast<Index>(m_impl.dimensions()[i] - 1));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
m_startIndices[i] = startIndicesClamped[i];
|
m_startIndices[i] = startIndicesClamped[i];
|
||||||
}
|
}
|
||||||
@ -867,7 +857,11 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
|
static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
|
||||||
|
#ifndef __SYCL_DEVICE_ONLY__
|
||||||
return numext::maxi(min, numext::mini(max,value));
|
return numext::maxi(min, numext::mini(max,value));
|
||||||
|
#else
|
||||||
|
return cl::sycl::clamp(value, min, max);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
array<Index, NumDims> m_outputStrides;
|
array<Index, NumDims> m_outputStrides;
|
||||||
|
@ -121,11 +121,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
{
|
{
|
||||||
m_dimensions = m_impl.dimensions();
|
m_dimensions = m_impl.dimensions();
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
#ifndef __SYCL_DEVICE_ONLY__
|
m_dimensions[i] =Eigen::numext::ceil(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
|
||||||
m_dimensions[i] = ceilf(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
|
|
||||||
#else
|
|
||||||
m_dimensions[i] = cl::sycl::ceil(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||||
@ -233,8 +229,6 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
/// required by sycl in order to extract the accessor
|
/// required by sycl in order to extract the accessor
|
||||||
Strides functor() const { return m_strides; }
|
Strides functor() const { return m_strides; }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
||||||
{
|
{
|
||||||
@ -264,7 +258,6 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
const Strides m_strides;
|
const Strides m_strides;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// Eval as lvalue
|
// Eval as lvalue
|
||||||
template<typename Strides, typename ArgType, typename Device>
|
template<typename Strides, typename ArgType, typename Device>
|
||||||
struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
||||||
@ -299,10 +292,9 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
/// required by sycl in order to extract the accessor
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return this->m_impl; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return this->m_impl; }
|
||||||
/// required by sycl in order to extract the accessor
|
/// required by sycl in order to extract the accessor
|
||||||
Strides functor() const { return this->m_strides; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Strides functor() const { return this->m_strides; }
|
||||||
|
|
||||||
|
|
||||||
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index index, const PacketReturnType& x)
|
void writePacket(Index index, const PacketReturnType& x)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user