mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-22 08:52:15 +08:00
merge
This commit is contained in:
commit
f95e3b84a5
@ -826,7 +826,7 @@ template<typename T> T generic_fast_tanh_float(const T& a_x);
|
|||||||
|
|
||||||
namespace numext {
|
namespace numext {
|
||||||
|
|
||||||
#ifndef __CUDA_ARCH__
|
#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
|
||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
|
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
|
||||||
@ -842,6 +842,84 @@ EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
|
|||||||
EIGEN_USING_STD_MATH(max);
|
EIGEN_USING_STD_MATH(max);
|
||||||
return max EIGEN_NOT_A_MACRO (x,y);
|
return max EIGEN_NOT_A_MACRO (x,y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#elif defined(__SYCL_DEVICE_ONLY__)
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
|
||||||
|
{
|
||||||
|
|
||||||
|
return y < x ? y : x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
|
||||||
|
{
|
||||||
|
|
||||||
|
return x < y ? y : x;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE int mini(const int& x, const int& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::min(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE int maxi(const int& x, const int& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::max(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE unsigned int mini(const unsigned int& x, const unsigned int& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::min(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE unsigned int maxi(const unsigned int& x, const unsigned int& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::max(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE long mini(const long & x, const long & y)
|
||||||
|
{
|
||||||
|
return cl::sycl::min(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE long maxi(const long & x, const long & y)
|
||||||
|
{
|
||||||
|
return cl::sycl::max(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE unsigned long mini(const unsigned long& x, const unsigned long& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::min(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE unsigned long maxi(const unsigned long& x, const unsigned long& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::max(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::fmin(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::fmax(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::fmin(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y)
|
||||||
|
{
|
||||||
|
return cl::sycl::fmax(x,y);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
template<typename T>
|
template<typename T>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
|
@ -45,7 +45,7 @@ struct DefaultDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
|
||||||
#ifndef __CUDA_ARCH__
|
#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
|
||||||
// Running on the host CPU
|
// Running on the host CPU
|
||||||
return l1CacheSize();
|
return l1CacheSize();
|
||||||
#else
|
#else
|
||||||
@ -55,7 +55,7 @@ struct DefaultDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
|
||||||
#ifndef __CUDA_ARCH__
|
#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
|
||||||
// Running single threaded on the host CPU
|
// Running single threaded on the host CPU
|
||||||
return l3CacheSize();
|
return l3CacheSize();
|
||||||
#else
|
#else
|
||||||
|
@ -17,6 +17,32 @@
|
|||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
|
#define ConvertToActualTypeSycl(Scalar, buf_acc) reinterpret_cast<typename cl::sycl::global_ptr<Scalar>::pointer_t>((&(*buf_acc.get_pointer())))
|
||||||
|
|
||||||
|
template <typename Scalar> class MemCopyFunctor {
|
||||||
|
public:
|
||||||
|
typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read, cl::sycl::access::target::global_buffer> read_accessor;
|
||||||
|
typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer> write_accessor;
|
||||||
|
|
||||||
|
MemCopyFunctor(read_accessor src_acc, write_accessor dst_acc, size_t rng, size_t i, size_t offset): m_src_acc(src_acc), m_dst_acc(dst_acc), m_rng(rng), m_i(i), m_offset(offset) {}
|
||||||
|
|
||||||
|
void operator()(cl::sycl::nd_item<1> itemID) {
|
||||||
|
auto src_ptr = ConvertToActualTypeSycl(Scalar, m_src_acc);
|
||||||
|
auto dst_ptr = ConvertToActualTypeSycl(Scalar, m_dst_acc);
|
||||||
|
auto globalid = itemID.get_global_linear_id();
|
||||||
|
if (globalid < m_rng) {
|
||||||
|
dst_ptr[globalid + m_i] = src_ptr[globalid + m_offset];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
read_accessor m_src_acc;
|
||||||
|
write_accessor m_dst_acc;
|
||||||
|
size_t m_rng;
|
||||||
|
size_t m_i;
|
||||||
|
size_t m_offset;
|
||||||
|
};
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){
|
EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){
|
||||||
auto devices = cl::sycl::device::get_devices();
|
auto devices = cl::sycl::device::get_devices();
|
||||||
std::vector<cl::sycl::device>::iterator it =devices.begin();
|
std::vector<cl::sycl::device>::iterator it =devices.begin();
|
||||||
@ -33,7 +59,6 @@ EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device
|
|||||||
}
|
}
|
||||||
return devices;
|
return devices;
|
||||||
}
|
}
|
||||||
#define ConvertToActualTypeSycl(T, buf_acc) reinterpret_cast<typename cl::sycl::global_ptr<T>::pointer_t>((&(*buf_acc.get_pointer())))
|
|
||||||
|
|
||||||
struct QueueInterface {
|
struct QueueInterface {
|
||||||
/// class members:
|
/// class members:
|
||||||
@ -170,30 +195,6 @@ struct SyclDevice {
|
|||||||
// some runtime conditions that can be applied here
|
// some runtime conditions that can be applied here
|
||||||
EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; }
|
EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; }
|
||||||
|
|
||||||
template <typename T> class MemCopyFunctor {
|
|
||||||
public:
|
|
||||||
typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read, cl::sycl::access::target::global_buffer> read_accessor;
|
|
||||||
typedef cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer> write_accessor;
|
|
||||||
|
|
||||||
MemCopyFunctor(read_accessor src_acc, write_accessor dst_acc, size_t rng, size_t i, size_t offset): m_src_acc(src_acc), m_dst_acc(dst_acc), m_rng(rng), m_i(i), m_offset(offset) {}
|
|
||||||
|
|
||||||
void operator()(cl::sycl::nd_item<1> itemID) {
|
|
||||||
auto src_ptr = ConvertToActualTypeSycl(T, m_src_acc);
|
|
||||||
auto dst_ptr = ConvertToActualTypeSycl(T, m_dst_acc);
|
|
||||||
auto globalid = itemID.get_global_linear_id();
|
|
||||||
if (globalid < m_rng) {
|
|
||||||
dst_ptr[globalid + m_i] = src_ptr[globalid + m_offset];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
read_accessor m_src_acc;
|
|
||||||
write_accessor m_dst_acc;
|
|
||||||
size_t m_rng;
|
|
||||||
size_t m_i;
|
|
||||||
size_t m_offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// the memcpy function
|
/// the memcpy function
|
||||||
template<typename T> EIGEN_STRONG_INLINE void memcpy(void *dst, const T *src, size_t n) const {
|
template<typename T> EIGEN_STRONG_INLINE void memcpy(void *dst, const T *src, size_t n) const {
|
||||||
auto it1 = m_queue_stream->find_buffer((void*)src);
|
auto it1 = m_queue_stream->find_buffer((void*)src);
|
||||||
@ -260,6 +261,17 @@ struct SyclDevice {
|
|||||||
});
|
});
|
||||||
synchronize();
|
synchronize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
|
||||||
|
// FIXME
|
||||||
|
return 48*1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
|
||||||
|
// We won't try to take advantage of the l2 cache for the time being, and
|
||||||
|
// there is no l3 cache on cuda devices.
|
||||||
|
return firstLevelCacheSize();
|
||||||
|
}
|
||||||
/// No need for sycl it should act the same as CPU version
|
/// No need for sycl it should act the same as CPU version
|
||||||
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
|
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
|
||||||
|
|
||||||
|
@ -124,7 +124,7 @@ namespace {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
struct DividerHelper<64, T> {
|
struct DividerHelper<64, T> {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
|
||||||
#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__)
|
#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
|
||||||
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
|
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
|
||||||
#else
|
#else
|
||||||
const uint64_t shift = 1ULL << log_div;
|
const uint64_t shift = 1ULL << log_div;
|
||||||
|
@ -723,7 +723,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_impl(op.expression(), device), m_device(device), m_strides(op.strides())
|
: m_impl(op.expression(), device), m_device(device), m_strides(op.strides()), m_exprStartIndices(op.startIndices()), m_exprStopIndices(op.stopIndices())
|
||||||
{
|
{
|
||||||
// Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
|
// Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
|
||||||
DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
|
DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
|
||||||
@ -828,6 +828,15 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//use by sycl
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStartIndices() const { return m_exprStartIndices; }
|
||||||
|
//use by sycl
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStopIndices() const { return m_exprStopIndices; }
|
||||||
|
//use by sycl
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& strides() const { return m_strides; }
|
||||||
|
/// used by sycl
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
||||||
{
|
{
|
||||||
@ -862,6 +871,10 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
|
DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
|
||||||
const Strides m_strides;
|
const Strides m_strides;
|
||||||
std::size_t m_block_total_size_max;
|
std::size_t m_block_total_size_max;
|
||||||
|
//use by sycl
|
||||||
|
const StartIndices m_exprStartIndices;
|
||||||
|
//use by sycl
|
||||||
|
const StopIndices m_exprStopIndices;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Eval as lvalue
|
// Eval as lvalue
|
||||||
|
@ -200,6 +200,13 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
||||||
|
|
||||||
|
/// used by sycl
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PaddingDimensions& padding() const { return m_padding; }
|
||||||
|
/// used by sycl
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& padding_value() const { return m_paddingValue; }
|
||||||
|
/// used by sycl
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
|
||||||
Index index, int dim_index) const {
|
Index index, int dim_index) const {
|
||||||
|
@ -200,9 +200,6 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> {
|
|||||||
/// creating the shared memory for calculating reduction.
|
/// creating the shared memory for calculating reduction.
|
||||||
/// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can
|
/// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can
|
||||||
/// recursively apply reduction on it in order to reduce the whole.
|
/// recursively apply reduction on it in order to reduce the whole.
|
||||||
// Dims dims= self.xprDims();
|
|
||||||
//Op functor = reducer;
|
|
||||||
|
|
||||||
dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange);
|
dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange);
|
||||||
dev.sycl_queue().submit([&](cl::sycl::handler &cgh) {
|
dev.sycl_queue().submit([&](cl::sycl::handler &cgh) {
|
||||||
// create a tuple of accessors from Evaluator
|
// create a tuple of accessors from Evaluator
|
||||||
@ -214,28 +211,6 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> {
|
|||||||
TensorSycl::internal::ReductionFunctor<HostExpr, PlaceHolderExpr, FunctorExpr, Tuple_of_Acc, Dims, Op, typename Self::Index>
|
TensorSycl::internal::ReductionFunctor<HostExpr, PlaceHolderExpr, FunctorExpr, Tuple_of_Acc, Dims, Op, typename Self::Index>
|
||||||
(output_accessor, functors, tuple_of_accessors, self.xprDims(), reducer, range));
|
(output_accessor, functors, tuple_of_accessors, self.xprDims(), reducer, range));
|
||||||
|
|
||||||
|
|
||||||
// [=](cl::sycl::nd_item<1> itemID) {
|
|
||||||
// typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
|
|
||||||
// auto device_expr = TensorSycl::internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
|
|
||||||
/// reduction cannot be captured automatically through our device conversion recursion. The reason is that reduction has two behaviour
|
|
||||||
/// the first behaviour is when it is used as a root to lauch the sub-kernel. The second one is when it is treated as a leafnode to pass the
|
|
||||||
/// calculated result to its parent kernel. While the latter is automatically detected through our device expression generator. The former is created here.
|
|
||||||
// const auto device_self_expr= TensorReductionOp<Op, Dims, decltype(device_expr.expr) ,MakeGlobalPointer>(device_expr.expr, dims, functor);
|
|
||||||
/// This is the evaluator for device_self_expr. This is exactly similar to the self which has been passed to run function. The difference is
|
|
||||||
/// the device_evaluator is detectable and recognisable on the device.
|
|
||||||
// typedef Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice> DeviceSelf;
|
|
||||||
// auto device_self_evaluator = Eigen::TensorEvaluator<decltype(device_self_expr), Eigen::DefaultDevice>(device_self_expr, Eigen::DefaultDevice());
|
|
||||||
// auto output_accessor_ptr =ConvertToActualTypeSycl(typename DeviceSelf::CoeffReturnType, output_accessor);
|
|
||||||
/// const cast added as a naive solution to solve the qualifier drop error
|
|
||||||
// auto globalid=itemID.get_global_linear_id();
|
|
||||||
// if (globalid< range) {
|
|
||||||
// typename DeviceSelf::CoeffReturnType accum = functor.initialize();
|
|
||||||
// GenericDimReducer<DeviceSelf::NumReducedDims-1, DeviceSelf, Op>::reduce(device_self_evaluator, device_self_evaluator.firstInput(static_cast<typename DevExpr::Index>(globalid)),const_cast<Op&>(functor), &accum);
|
|
||||||
// functor.finalize(accum);
|
|
||||||
// output_accessor_ptr[globalid]= accum;
|
|
||||||
// }
|
|
||||||
// });
|
|
||||||
});
|
});
|
||||||
dev.synchronize();
|
dev.synchronize();
|
||||||
return false;
|
return false;
|
||||||
|
@ -125,18 +125,31 @@ KERNELBROKERCONVERTSLICEOP()
|
|||||||
#undef KERNELBROKERCONVERTSLICEOP
|
#undef KERNELBROKERCONVERTSLICEOP
|
||||||
|
|
||||||
|
|
||||||
#define KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(OPEXPR, CVQual)\
|
#define KERNELBROKERCONVERTERSLICESTRIDEOP(CVQual)\
|
||||||
|
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>\
|
||||||
|
struct ConvertToDeviceExpression<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >{\
|
||||||
|
typedef CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, typename ConvertToDeviceExpression<XprType>::Type> Type;\
|
||||||
|
};
|
||||||
|
|
||||||
|
KERNELBROKERCONVERTERSLICESTRIDEOP(const)
|
||||||
|
KERNELBROKERCONVERTERSLICESTRIDEOP()
|
||||||
|
#undef KERNELBROKERCONVERTERSLICESTRIDEOP
|
||||||
|
|
||||||
|
#define KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(OPEXPR, CVQual)\
|
||||||
template<typename Param, typename XprType>\
|
template<typename Param, typename XprType>\
|
||||||
struct ConvertToDeviceExpression<CVQual OPEXPR <Param, XprType> >{\
|
struct ConvertToDeviceExpression<CVQual OPEXPR <Param, XprType> >{\
|
||||||
typedef CVQual OPEXPR<Param, typename ConvertToDeviceExpression<XprType>::Type> Type;\
|
typedef CVQual OPEXPR<Param, typename ConvertToDeviceExpression<XprType>::Type> Type;\
|
||||||
};
|
};
|
||||||
|
|
||||||
KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, const)
|
KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorPaddingOp, const)
|
||||||
KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, )
|
KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorPaddingOp, )
|
||||||
|
|
||||||
KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, const)
|
KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorReshapingOp, const)
|
||||||
KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, )
|
KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorReshapingOp, )
|
||||||
#undef KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP
|
|
||||||
|
KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorShufflingOp, const)
|
||||||
|
KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP(TensorShufflingOp, )
|
||||||
|
#undef KERNELBROKERCONVERTPADDINGANDRESHAPEANDSHUFFLEOP
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
} // namespace TensorSycl
|
} // namespace TensorSycl
|
||||||
|
@ -231,7 +231,7 @@ SYCLREDUCTIONEXPR()
|
|||||||
template<typename StartIndices, typename Sizes, typename OrigXprType, typename XprType, typename... Params>\
|
template<typename StartIndices, typename Sizes, typename OrigXprType, typename XprType, typename... Params>\
|
||||||
struct ExprConstructor<CVQual TensorSlicingOp <StartIndices, Sizes, OrigXprType> , CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Params... >{\
|
struct ExprConstructor<CVQual TensorSlicingOp <StartIndices, Sizes, OrigXprType> , CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Params... >{\
|
||||||
typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\
|
typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\
|
||||||
typedef CVQual TensorSlicingOp<StartIndices, Sizes, typename my_xpr_type::Type> Type ;\
|
typedef CVQual TensorSlicingOp<StartIndices, Sizes, typename my_xpr_type::Type> Type;\
|
||||||
my_xpr_type xprExpr;\
|
my_xpr_type xprExpr;\
|
||||||
Type expr;\
|
Type expr;\
|
||||||
template <typename FuncDetector>\
|
template <typename FuncDetector>\
|
||||||
@ -244,6 +244,22 @@ SYCLSLICEOPEXPR()
|
|||||||
#undef SYCLSLICEOPEXPR
|
#undef SYCLSLICEOPEXPR
|
||||||
|
|
||||||
|
|
||||||
|
#define SYCLSLICESTRIDEOPEXPR(CVQual)\
|
||||||
|
template<typename StartIndices, typename StopIndices, typename Strides, typename OrigXprType, typename XprType, typename... Params>\
|
||||||
|
struct ExprConstructor<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, OrigXprType>, CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Params... >{\
|
||||||
|
typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\
|
||||||
|
typedef CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, typename my_xpr_type::Type> Type;\
|
||||||
|
my_xpr_type xprExpr;\
|
||||||
|
Type expr;\
|
||||||
|
template <typename FuncDetector>\
|
||||||
|
ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
|
||||||
|
: xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.startIndices(), funcD.stopIndices(),funcD.strides()) {}\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLSLICESTRIDEOPEXPR(const)
|
||||||
|
SYCLSLICESTRIDEOPEXPR()
|
||||||
|
#undef SYCLSLICESTRIDEOPEXPR
|
||||||
|
|
||||||
#define SYCLRESHAPEANDSHUFFLEOPEXPRCONST(OPEXPR, CVQual)\
|
#define SYCLRESHAPEANDSHUFFLEOPEXPRCONST(OPEXPR, CVQual)\
|
||||||
template<typename Param, typename OrigXprType, typename XprType, typename... Params>\
|
template<typename Param, typename OrigXprType, typename XprType, typename... Params>\
|
||||||
struct ExprConstructor<CVQual OPEXPR <Param, OrigXprType> , CVQual OPEXPR <Param, XprType>, Params... >{\
|
struct ExprConstructor<CVQual OPEXPR <Param, OrigXprType> , CVQual OPEXPR <Param, XprType>, Params... >{\
|
||||||
@ -263,6 +279,23 @@ SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, const)
|
|||||||
SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, )
|
SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, )
|
||||||
#undef SYCLRESHAPEANDSHUFFLEOPEXPRCONST
|
#undef SYCLRESHAPEANDSHUFFLEOPEXPRCONST
|
||||||
|
|
||||||
|
#define SYCLPADDINGOPEXPRCONST(OPEXPR, CVQual)\
|
||||||
|
template<typename Param, typename OrigXprType, typename XprType, typename... Params>\
|
||||||
|
struct ExprConstructor<CVQual OPEXPR <Param, OrigXprType> , CVQual OPEXPR <Param, XprType>, Params... >{\
|
||||||
|
typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\
|
||||||
|
typedef CVQual OPEXPR <Param, typename my_xpr_type::Type> Type ;\
|
||||||
|
my_xpr_type xprExpr;\
|
||||||
|
Type expr;\
|
||||||
|
template <typename FuncDetector>\
|
||||||
|
ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
|
||||||
|
: xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.param() , funcD.scalar_param()) {}\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLPADDINGOPEXPRCONST(TensorPaddingOp, const)
|
||||||
|
SYCLPADDINGOPEXPRCONST(TensorPaddingOp, )
|
||||||
|
#undef SYCLPADDINGOPEXPRCONST
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// template deduction for \ref ExprConstructor struct
|
/// template deduction for \ref ExprConstructor struct
|
||||||
template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params>
|
template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params>
|
||||||
|
@ -209,7 +209,21 @@ SYCLSLICEOPEXTACC(const)
|
|||||||
SYCLSLICEOPEXTACC()
|
SYCLSLICEOPEXTACC()
|
||||||
#undef SYCLSLICEOPEXTACC
|
#undef SYCLSLICEOPEXTACC
|
||||||
|
|
||||||
#define RESHAPEANDSHUFFOPEXTRACC(OPEXPR, CVQual)\
|
#define SYCLSLICESTRIDEOPEXTACC(CVQual)\
|
||||||
|
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType, typename Dev>\
|
||||||
|
struct ExtractAccessor<TensorEvaluator<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Dev> >{\
|
||||||
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Dev>& eval)\
|
||||||
|
-> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\
|
||||||
|
return AccessorConstructor::getTuple(cgh, eval.impl());\
|
||||||
|
}\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLSLICESTRIDEOPEXTACC(const)
|
||||||
|
SYCLSLICESTRIDEOPEXTACC()
|
||||||
|
#undef SYCLSLICESTRIDEOPEXTACC
|
||||||
|
|
||||||
|
|
||||||
|
#define PADDINGRESHAPEANDSHUFFOPEXTRACC(OPEXPR, CVQual)\
|
||||||
template<typename Param, typename XprType, typename Dev>\
|
template<typename Param, typename XprType, typename Dev>\
|
||||||
struct ExtractAccessor<TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev> > {\
|
struct ExtractAccessor<TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev> > {\
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev>& eval)\
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev>& eval)\
|
||||||
@ -217,13 +231,17 @@ struct ExtractAccessor<TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev> > {\
|
|||||||
return AccessorConstructor::getTuple(cgh, eval.impl());\
|
return AccessorConstructor::getTuple(cgh, eval.impl());\
|
||||||
}\
|
}\
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// tensor padding
|
||||||
|
PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorPaddingOp, const)
|
||||||
|
PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorPaddingOp, )
|
||||||
// tensor reshaping
|
// tensor reshaping
|
||||||
RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, const)
|
PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, const)
|
||||||
RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, )
|
PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, )
|
||||||
/// Tensor shuffling
|
/// Tensor shuffling
|
||||||
RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, const)
|
PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, const)
|
||||||
RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, )
|
PADDINGRESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, )
|
||||||
#undef RESHAPEANDSHUFFOPEXTRACC
|
#undef PADDINGRESHAPEANDSHUFFOPEXTRACC
|
||||||
|
|
||||||
/// template deduction for \ref ExtractAccessor
|
/// template deduction for \ref ExtractAccessor
|
||||||
template <typename Evaluator>
|
template <typename Evaluator>
|
||||||
|
@ -176,6 +176,24 @@ SYCLEXTRFUNCTSLICEOP(const)
|
|||||||
SYCLEXTRFUNCTSLICEOP()
|
SYCLEXTRFUNCTSLICEOP()
|
||||||
#undef SYCLEXTRFUNCTSLICEOP
|
#undef SYCLEXTRFUNCTSLICEOP
|
||||||
|
|
||||||
|
#define SYCLEXTRFUNCTSLICESTRIDEOP(CVQual)\
|
||||||
|
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType, typename Dev>\
|
||||||
|
struct FunctorExtractor<TensorEvaluator<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Dev> >{\
|
||||||
|
FunctorExtractor<TensorEvaluator<XprType, Dev> > xprExpr;\
|
||||||
|
const StartIndices m_startIndices;\
|
||||||
|
const StopIndices m_stopIndices;\
|
||||||
|
const Strides m_strides;\
|
||||||
|
FunctorExtractor(const TensorEvaluator<CVQual TensorStridingSlicingOp<StartIndices, StopIndices,Strides, XprType>, Dev>& expr)\
|
||||||
|
: xprExpr(expr.impl()), m_startIndices(expr.exprStartIndices()), m_stopIndices(expr.exprStopIndices()), m_strides(expr.strides()) {}\
|
||||||
|
EIGEN_STRONG_INLINE const StartIndices& startIndices() const { return m_startIndices; }\
|
||||||
|
EIGEN_STRONG_INLINE const StartIndices& stopIndices() const { return m_stopIndices; }\
|
||||||
|
EIGEN_STRONG_INLINE const StartIndices& strides() const { return m_strides; }\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLEXTRFUNCTSLICESTRIDEOP(const)
|
||||||
|
SYCLEXTRFUNCTSLICESTRIDEOP()
|
||||||
|
#undef SYCLEXTRFUNCTSLICESTRIDEOP
|
||||||
|
|
||||||
// Had to separate reshapeOP otherwise it will be mistaken by UnaryCategory
|
// Had to separate reshapeOP otherwise it will be mistaken by UnaryCategory
|
||||||
#define SYCLRESHAPEANDSHUFFLEOPFUNCEXT(OPEXPR, FUNCCALL, CVQual)\
|
#define SYCLRESHAPEANDSHUFFLEOPFUNCEXT(OPEXPR, FUNCCALL, CVQual)\
|
||||||
template<typename Param, typename XprType, typename Dev>\
|
template<typename Param, typename XprType, typename Dev>\
|
||||||
@ -192,7 +210,25 @@ SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorReshapingOp, dimensions(), )
|
|||||||
|
|
||||||
SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), const)
|
SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), const)
|
||||||
SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), )
|
SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), )
|
||||||
#undef SYCLRESHAPEOPEXPR
|
#undef SYCLRESHAPEANDSHUFFLEOPFUNCEXT
|
||||||
|
|
||||||
|
// Had to separate reshapeOP otherwise it will be mistaken by UnaryCategory
|
||||||
|
#define PADDINGOPFUNCEXT(OPEXPR, FUNCCALL, SCALARFUNCCALL, CVQual)\
|
||||||
|
template<typename Param, typename XprType, typename Dev>\
|
||||||
|
struct FunctorExtractor<Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev> > {\
|
||||||
|
FunctorExtractor<Eigen::TensorEvaluator<XprType, Dev> > xprExpr;\
|
||||||
|
const Param m_param;\
|
||||||
|
typedef typename Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev>::Scalar Scalar;\
|
||||||
|
const Scalar m_scalar_param;\
|
||||||
|
EIGEN_STRONG_INLINE const Param& param() const { return m_param; }\
|
||||||
|
EIGEN_STRONG_INLINE const Scalar& scalar_param() const { return m_scalar_param; }\
|
||||||
|
FunctorExtractor(const Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev>& expr)\
|
||||||
|
: xprExpr(expr.impl()), m_param(expr.FUNCCALL), m_scalar_param(expr.SCALARFUNCCALL) {}\
|
||||||
|
};
|
||||||
|
|
||||||
|
PADDINGOPFUNCEXT(TensorPaddingOp, padding(), padding_value(), const)
|
||||||
|
PADDINGOPFUNCEXT(TensorPaddingOp, padding(), padding_value(), )
|
||||||
|
#undef PADDINGOPFUNCEXT
|
||||||
|
|
||||||
/// template deduction function for FunctorExtractor
|
/// template deduction function for FunctorExtractor
|
||||||
template <typename Evaluator>
|
template <typename Evaluator>
|
||||||
|
@ -124,17 +124,27 @@ SLICEOPLEAFCOUNT(const)
|
|||||||
SLICEOPLEAFCOUNT()
|
SLICEOPLEAFCOUNT()
|
||||||
#undef SLICEOPLEAFCOUNT
|
#undef SLICEOPLEAFCOUNT
|
||||||
|
|
||||||
#define RESHAPEANDSHUFFLELEAFCOUNT(OPEXPR, CVQual)\
|
#define SLICESTRIDEOPLEAFCOUNT(CVQual)\
|
||||||
|
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>\
|
||||||
|
struct LeafCount<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >:CategoryCount<XprType>{};
|
||||||
|
|
||||||
|
SLICESTRIDEOPLEAFCOUNT(const)
|
||||||
|
SLICESTRIDEOPLEAFCOUNT()
|
||||||
|
#undef SLICESTRIDEOPLEAFCOUNT
|
||||||
|
|
||||||
|
#define PADDINGRESHAPEANDSHUFFLELEAFCOUNT(OPEXPR, CVQual)\
|
||||||
template<typename Param, typename XprType>\
|
template<typename Param, typename XprType>\
|
||||||
struct LeafCount<CVQual OPEXPR<Param, XprType> >:CategoryCount<XprType>{};
|
struct LeafCount<CVQual OPEXPR<Param, XprType> >:CategoryCount<XprType>{};
|
||||||
|
|
||||||
RESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, const)
|
PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorPaddingOp, const)
|
||||||
RESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, )
|
PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorPaddingOp, )
|
||||||
|
|
||||||
RESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, const)
|
PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, const)
|
||||||
RESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, )
|
PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, )
|
||||||
#undef RESHAPEANDSHUFFLELEAFCOUNT
|
|
||||||
|
|
||||||
|
PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, const)
|
||||||
|
PADDINGRESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, )
|
||||||
|
#undef PADDINGRESHAPEANDSHUFFLELEAFCOUNT
|
||||||
|
|
||||||
} /// namespace TensorSycl
|
} /// namespace TensorSycl
|
||||||
} /// namespace internal
|
} /// namespace internal
|
||||||
|
@ -180,18 +180,32 @@ SLICEOPEXPR(const)
|
|||||||
SLICEOPEXPR()
|
SLICEOPEXPR()
|
||||||
#undef SLICEOPEXPR
|
#undef SLICEOPEXPR
|
||||||
|
|
||||||
#define RESHAPEANDSHUFFLEOPPLH(OPEXP , CVQual)\
|
|
||||||
|
#define SYCLSLICESTRIDEOPPLH(CVQual)\
|
||||||
|
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType, size_t N>\
|
||||||
|
struct PlaceHolderExpression<CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, N> {\
|
||||||
|
typedef CVQual TensorStridingSlicingOp<StartIndices, StopIndices, Strides, typename CalculateIndex<N, XprType>::ArgType> Type;\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLSLICESTRIDEOPPLH(const)
|
||||||
|
SYCLSLICESTRIDEOPPLH()
|
||||||
|
#undef SYCLSLICESTRIDEOPPLH
|
||||||
|
|
||||||
|
#define PADDINGRESHAPEANDSHUFFLEOPPLH(OPEXP , CVQual)\
|
||||||
template<typename Param, typename XprType, size_t N>\
|
template<typename Param, typename XprType, size_t N>\
|
||||||
struct PlaceHolderExpression<CVQual OPEXP<Param, XprType>, N > {\
|
struct PlaceHolderExpression<CVQual OPEXP<Param, XprType>, N > {\
|
||||||
typedef CVQual OPEXP<Param, typename CalculateIndex<N, XprType>::ArgType> Type;\
|
typedef CVQual OPEXP<Param, typename CalculateIndex<N, XprType>::ArgType> Type;\
|
||||||
};
|
};
|
||||||
|
|
||||||
RESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, const)
|
PADDINGRESHAPEANDSHUFFLEOPPLH(TensorPaddingOp, const)
|
||||||
RESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, )
|
PADDINGRESHAPEANDSHUFFLEOPPLH(TensorPaddingOp,)
|
||||||
|
|
||||||
RESHAPEANDSHUFFLEOPPLH(TensorShufflingOp, const)
|
PADDINGRESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, const)
|
||||||
RESHAPEANDSHUFFLEOPPLH(TensorShufflingOp,)
|
PADDINGRESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, )
|
||||||
#undef RESHAPEANDSHUFFLEOPPLH
|
|
||||||
|
PADDINGRESHAPEANDSHUFFLEOPPLH(TensorShufflingOp, const)
|
||||||
|
PADDINGRESHAPEANDSHUFFLEOPPLH(TensorShufflingOp,)
|
||||||
|
#undef PADDINGRESHAPEANDSHUFFLEOPPLH
|
||||||
|
|
||||||
/// template deduction for \ref PlaceHolderExpression struct
|
/// template deduction for \ref PlaceHolderExpression struct
|
||||||
template <typename Expr>
|
template <typename Expr>
|
||||||
|
@ -49,6 +49,11 @@ struct numeric_list {
|
|||||||
static constexpr std::size_t count = sizeof...(nn);
|
static constexpr std::size_t count = sizeof...(nn);
|
||||||
const T values[count] = {nn...};
|
const T values[count] = {nn...};
|
||||||
};
|
};
|
||||||
|
template<typename T>
|
||||||
|
struct numeric_list<T>{
|
||||||
|
static constexpr std::size_t count = 0;
|
||||||
|
//Array of size zero strictly forbiden in ISO C++
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -148,6 +148,7 @@ if(EIGEN_TEST_CXX11)
|
|||||||
ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
|
||||||
ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
|
||||||
ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11")
|
||||||
|
ei_add_test_sycl(cxx11_tensor_padding_sycl "-std=c++11")
|
||||||
ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
|
||||||
endif(EIGEN_TEST_SYCL)
|
endif(EIGEN_TEST_SYCL)
|
||||||
# It should be safe to always run these tests as there is some fallback code for
|
# It should be safe to always run these tests as there is some fallback code for
|
||||||
|
@ -180,6 +180,53 @@ static void test_simple_slice(const Eigen::SyclDevice &sycl_device)
|
|||||||
sycl_device.deallocate(gpu_data3);
|
sycl_device.deallocate(gpu_data3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename DataType, int DataLayout, typename IndexType>
|
||||||
|
static void test_strided_slice_write_sycl(const Eigen::SyclDevice& sycl_device)
|
||||||
|
{
|
||||||
|
typedef Tensor<DataType, 2, DataLayout, IndexType> Tensor2f;
|
||||||
|
typedef Eigen::DSizes<IndexType, 2> Index2;
|
||||||
|
IndexType sizeDim1 = 7L;
|
||||||
|
IndexType sizeDim2 = 11L;
|
||||||
|
array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}};
|
||||||
|
Tensor<DataType, 2, DataLayout, IndexType> tensor(tensorRange),tensor2(tensorRange);
|
||||||
|
IndexType sliceDim1 = 2;
|
||||||
|
IndexType sliceDim2 = 3;
|
||||||
|
array<IndexType, 2> sliceRange = {{sliceDim1, sliceDim2}};
|
||||||
|
Tensor2f slice(sliceRange);
|
||||||
|
Index2 strides(1L,1L);
|
||||||
|
Index2 indicesStart(3L,4L);
|
||||||
|
Index2 indicesStop(5L,7L);
|
||||||
|
Index2 lengths(2L,3L);
|
||||||
|
|
||||||
|
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
|
||||||
|
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
|
||||||
|
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice.size()*sizeof(DataType)));
|
||||||
|
TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
|
||||||
|
TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, tensorRange);
|
||||||
|
TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu3(gpu_data3, sliceRange);
|
||||||
|
|
||||||
|
|
||||||
|
tensor.setRandom();
|
||||||
|
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
|
||||||
|
gpu2.device(sycl_device)=gpu1;
|
||||||
|
|
||||||
|
slice.setRandom();
|
||||||
|
sycl_device.memcpyHostToDevice(gpu_data3, slice.data(),(slice.size())*sizeof(DataType));
|
||||||
|
|
||||||
|
|
||||||
|
gpu1.slice(indicesStart,lengths).device(sycl_device)=gpu3;
|
||||||
|
gpu2.stridedSlice(indicesStart,indicesStop,strides).device(sycl_device)=gpu3;
|
||||||
|
sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data1,(tensor.size())*sizeof(DataType));
|
||||||
|
sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType));
|
||||||
|
|
||||||
|
for(int i=0;i<sizeDim1;i++) for(int j=0;j<sizeDim2;j++){
|
||||||
|
VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j));
|
||||||
|
}
|
||||||
|
sycl_device.deallocate(gpu_data1);
|
||||||
|
sycl_device.deallocate(gpu_data2);
|
||||||
|
sycl_device.deallocate(gpu_data3);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_device(dev_Selector s){
|
template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_device(dev_Selector s){
|
||||||
QueueInterface queueInterface(s);
|
QueueInterface queueInterface(s);
|
||||||
auto sycl_device = Eigen::SyclDevice(&queueInterface);
|
auto sycl_device = Eigen::SyclDevice(&queueInterface);
|
||||||
@ -189,6 +236,8 @@ template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_d
|
|||||||
test_simple_reshape<DataType, ColMajor>(sycl_device);
|
test_simple_reshape<DataType, ColMajor>(sycl_device);
|
||||||
test_reshape_as_lvalue<DataType, RowMajor>(sycl_device);
|
test_reshape_as_lvalue<DataType, RowMajor>(sycl_device);
|
||||||
test_reshape_as_lvalue<DataType, ColMajor>(sycl_device);
|
test_reshape_as_lvalue<DataType, ColMajor>(sycl_device);
|
||||||
|
test_strided_slice_write_sycl<DataType, ColMajor, int64_t>(sycl_device);
|
||||||
|
test_strided_slice_write_sycl<DataType, RowMajor, int64_t>(sycl_device);
|
||||||
}
|
}
|
||||||
void test_cxx11_tensor_morphing_sycl()
|
void test_cxx11_tensor_morphing_sycl()
|
||||||
{
|
{
|
||||||
|
161
unsupported/test/cxx11_tensor_padding_sycl.cpp
Normal file
161
unsupported/test/cxx11_tensor_padding_sycl.cpp
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2016
|
||||||
|
// Mehdi Goli Codeplay Software Ltd.
|
||||||
|
// Ralph Potter Codeplay Software Ltd.
|
||||||
|
// Luke Iwanski Codeplay Software Ltd.
|
||||||
|
// Contact: <eigen@codeplay.com>
|
||||||
|
// Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
|
||||||
|
#define EIGEN_TEST_NO_LONGDOUBLE
|
||||||
|
#define EIGEN_TEST_NO_COMPLEX
|
||||||
|
#define EIGEN_TEST_FUNC cxx11_tensor_padding_sycl
|
||||||
|
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
|
||||||
|
#define EIGEN_USE_SYCL
|
||||||
|
|
||||||
|
|
||||||
|
#include "main.h"
|
||||||
|
#include <unsupported/Eigen/CXX11/Tensor>
|
||||||
|
|
||||||
|
using Eigen::array;
|
||||||
|
using Eigen::SyclDevice;
|
||||||
|
using Eigen::Tensor;
|
||||||
|
using Eigen::TensorMap;
|
||||||
|
|
||||||
|
|
||||||
|
template<typename DataType, int DataLayout, typename IndexType>
|
||||||
|
static void test_simple_padding(const Eigen::SyclDevice& sycl_device)
|
||||||
|
{
|
||||||
|
|
||||||
|
IndexType sizeDim1 = 2;
|
||||||
|
IndexType sizeDim2 = 3;
|
||||||
|
IndexType sizeDim3 = 5;
|
||||||
|
IndexType sizeDim4 = 7;
|
||||||
|
array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
|
||||||
|
|
||||||
|
Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
|
||||||
|
tensor.setRandom();
|
||||||
|
|
||||||
|
array<std::pair<IndexType, IndexType>, 4> paddings;
|
||||||
|
paddings[0] = std::make_pair(0, 0);
|
||||||
|
paddings[1] = std::make_pair(2, 1);
|
||||||
|
paddings[2] = std::make_pair(3, 4);
|
||||||
|
paddings[3] = std::make_pair(0, 0);
|
||||||
|
|
||||||
|
IndexType padedSizeDim1 = 2;
|
||||||
|
IndexType padedSizeDim2 = 6;
|
||||||
|
IndexType padedSizeDim3 = 12;
|
||||||
|
IndexType padedSizeDim4 = 7;
|
||||||
|
array<IndexType, 4> padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}};
|
||||||
|
|
||||||
|
Tensor<DataType, 4, DataLayout, IndexType> padded(padedtensorRange);
|
||||||
|
|
||||||
|
|
||||||
|
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
|
||||||
|
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(padded.size()*sizeof(DataType)));
|
||||||
|
TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
|
||||||
|
TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu2(gpu_data2, padedtensorRange);
|
||||||
|
|
||||||
|
VERIFY_IS_EQUAL(padded.dimension(0), 2+0);
|
||||||
|
VERIFY_IS_EQUAL(padded.dimension(1), 3+3);
|
||||||
|
VERIFY_IS_EQUAL(padded.dimension(2), 5+7);
|
||||||
|
VERIFY_IS_EQUAL(padded.dimension(3), 7+0);
|
||||||
|
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
|
||||||
|
gpu2.device(sycl_device)=gpu1.pad(paddings);
|
||||||
|
sycl_device.memcpyDeviceToHost(padded.data(), gpu_data2,(padded.size())*sizeof(DataType));
|
||||||
|
for (int i = 0; i < padedSizeDim1; ++i) {
|
||||||
|
for (int j = 0; j < padedSizeDim2; ++j) {
|
||||||
|
for (int k = 0; k < padedSizeDim3; ++k) {
|
||||||
|
for (int l = 0; l < padedSizeDim4; ++l) {
|
||||||
|
if (j >= 2 && j < 5 && k >= 3 && k < 8) {
|
||||||
|
VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l));
|
||||||
|
} else {
|
||||||
|
VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sycl_device.deallocate(gpu_data1);
|
||||||
|
sycl_device.deallocate(gpu_data2);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename DataType, int DataLayout, typename IndexType>
|
||||||
|
static void test_padded_expr(const Eigen::SyclDevice& sycl_device)
|
||||||
|
{
|
||||||
|
IndexType sizeDim1 = 2;
|
||||||
|
IndexType sizeDim2 = 3;
|
||||||
|
IndexType sizeDim3 = 5;
|
||||||
|
IndexType sizeDim4 = 7;
|
||||||
|
array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
|
||||||
|
|
||||||
|
Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
|
||||||
|
tensor.setRandom();
|
||||||
|
|
||||||
|
array<std::pair<IndexType, IndexType>, 4> paddings;
|
||||||
|
paddings[0] = std::make_pair(0, 0);
|
||||||
|
paddings[1] = std::make_pair(2, 1);
|
||||||
|
paddings[2] = std::make_pair(3, 4);
|
||||||
|
paddings[3] = std::make_pair(0, 0);
|
||||||
|
|
||||||
|
Eigen::DSizes<IndexType, 2> reshape_dims;
|
||||||
|
reshape_dims[0] = 12;
|
||||||
|
reshape_dims[1] = 84;
|
||||||
|
|
||||||
|
|
||||||
|
Tensor<DataType, 2, DataLayout, IndexType> result(reshape_dims);
|
||||||
|
|
||||||
|
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
|
||||||
|
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(result.size()*sizeof(DataType)));
|
||||||
|
TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
|
||||||
|
TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, reshape_dims);
|
||||||
|
|
||||||
|
|
||||||
|
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
|
||||||
|
gpu2.device(sycl_device)=gpu1.pad(paddings).reshape(reshape_dims);
|
||||||
|
sycl_device.memcpyDeviceToHost(result.data(), gpu_data2,(result.size())*sizeof(DataType));
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; ++i) {
|
||||||
|
for (int j = 0; j < 6; ++j) {
|
||||||
|
for (int k = 0; k < 12; ++k) {
|
||||||
|
for (int l = 0; l < 7; ++l) {
|
||||||
|
const float result_value = DataLayout == ColMajor ?
|
||||||
|
result(i+2*j,k+12*l) : result(j+6*i,l+7*k);
|
||||||
|
if (j >= 2 && j < 5 && k >= 3 && k < 8) {
|
||||||
|
VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l));
|
||||||
|
} else {
|
||||||
|
VERIFY_IS_EQUAL(result_value, 0.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sycl_device.deallocate(gpu_data1);
|
||||||
|
sycl_device.deallocate(gpu_data2);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename DataType, typename dev_Selector> void sycl_padding_test_per_device(dev_Selector s){
|
||||||
|
QueueInterface queueInterface(s);
|
||||||
|
auto sycl_device = Eigen::SyclDevice(&queueInterface);
|
||||||
|
test_simple_padding<DataType, RowMajor, int>(sycl_device);
|
||||||
|
test_simple_padding<DataType, ColMajor, int>(sycl_device);
|
||||||
|
test_padded_expr<DataType, RowMajor, int>(sycl_device);
|
||||||
|
test_padded_expr<DataType, ColMajor, int>(sycl_device);
|
||||||
|
test_simple_padding<DataType, RowMajor, int64_t>(sycl_device);
|
||||||
|
test_simple_padding<DataType, ColMajor, int64_t>(sycl_device);
|
||||||
|
test_padded_expr<DataType, RowMajor, int64_t>(sycl_device);
|
||||||
|
test_padded_expr<DataType, ColMajor, int64_t>(sycl_device);
|
||||||
|
|
||||||
|
}
|
||||||
|
void test_cxx11_tensor_padding_sycl()
|
||||||
|
{
|
||||||
|
for (const auto& device :Eigen::get_sycl_supported_devices()) {
|
||||||
|
CALL_SUBTEST(sycl_padding_test_per_device<float>(device));
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user