mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
Adding TensorShuffling backend for sycl; adding TensorReshaping backend for sycl; cleaning up the sycl backend.
This commit is contained in:
parent
02080e2b67
commit
577ce78085
@ -31,7 +31,6 @@ auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){
|
|||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("Device size %ld\n", devices.size());
|
|
||||||
return devices;
|
return devices;
|
||||||
}
|
}
|
||||||
#define ConvertToActualTypeSycl(T, buf_acc) reinterpret_cast<typename cl::sycl::global_ptr<T>::pointer_t>((&(*buf_acc.get_pointer())))
|
#define ConvertToActualTypeSycl(T, buf_acc) reinterpret_cast<typename cl::sycl::global_ptr<T>::pointer_t>((&(*buf_acc.get_pointer())))
|
||||||
@ -93,11 +92,6 @@ struct QueueInterface {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void deallocate_all() const {
|
|
||||||
std::lock_guard<std::mutex> lock(mutex_);
|
|
||||||
buffer_map.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE std::map<const uint8_t *, cl::sycl::buffer<uint8_t,1>>::iterator find_buffer(const void* ptr) const {
|
EIGEN_STRONG_INLINE std::map<const uint8_t *, cl::sycl::buffer<uint8_t,1>>::iterator find_buffer(const void* ptr) const {
|
||||||
std::lock_guard<std::mutex> lock(mutex_);
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
auto it1 = buffer_map.find(static_cast<const uint8_t*>(ptr));
|
auto it1 = buffer_map.find(static_cast<const uint8_t*>(ptr));
|
||||||
@ -118,10 +112,11 @@ struct QueueInterface {
|
|||||||
// underlying stream device.
|
// underlying stream device.
|
||||||
EIGEN_STRONG_INLINE bool ok() const {
|
EIGEN_STRONG_INLINE bool ok() const {
|
||||||
if (!exception_caught_) {
|
if (!exception_caught_) {
|
||||||
m_queue.throw_asynchronous();
|
m_queue.wait_and_throw();
|
||||||
}
|
}
|
||||||
return !exception_caught_;
|
return !exception_caught_;
|
||||||
}
|
}
|
||||||
|
|
||||||
// destructor
|
// destructor
|
||||||
~QueueInterface() { buffer_map.clear(); }
|
~QueueInterface() { buffer_map.clear(); }
|
||||||
};
|
};
|
||||||
@ -186,7 +181,7 @@ struct SyclDevice {
|
|||||||
auto dst_acc =it2->second.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
|
auto dst_acc =it2->second.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
|
||||||
cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), TensorSycl::internal::MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, offset));
|
cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), TensorSycl::internal::MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, offset));
|
||||||
});
|
});
|
||||||
sycl_queue().throw_asynchronous();
|
synchronize();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The memcpyHostToDevice is used to copy the device only pointer to a host pointer. Using the device
|
/// The memcpyHostToDevice is used to copy the device only pointer to a host pointer. Using the device
|
||||||
@ -217,7 +212,7 @@ struct SyclDevice {
|
|||||||
auto dst_acc =dest_buf.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
|
auto dst_acc =dest_buf.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
|
||||||
cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), TensorSycl::internal::MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, offset));
|
cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), TensorSycl::internal::MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, offset));
|
||||||
});
|
});
|
||||||
sycl_queue().throw_asynchronous();
|
synchronize();
|
||||||
}
|
}
|
||||||
/// returning the sycl queue
|
/// returning the sycl queue
|
||||||
EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;}
|
EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;}
|
||||||
@ -235,13 +230,13 @@ struct SyclDevice {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
sycl_queue().throw_asynchronous();
|
synchronize();
|
||||||
}
|
}
|
||||||
/// No need for sycl it should act the same as CPU version
|
/// No need for sycl it should act the same as CPU version
|
||||||
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
|
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
|
||||||
/// There is no need to synchronise the buffer in sycl as it is automatically handled by sycl runtime scheduler.
|
|
||||||
EIGEN_STRONG_INLINE void synchronize() const {
|
EIGEN_STRONG_INLINE void synchronize() const {
|
||||||
sycl_queue().wait_and_throw();
|
sycl_queue().wait_and_throw(); //pass
|
||||||
}
|
}
|
||||||
// This function checks if the runtime recorded an error for the
|
// This function checks if the runtime recorded an error for the
|
||||||
// underlying stream device.
|
// underlying stream device.
|
||||||
|
@ -168,12 +168,12 @@ template <typename Idx> struct IndexPair {
|
|||||||
#ifdef EIGEN_HAS_SFINAE
|
#ifdef EIGEN_HAS_SFINAE
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template<typename IndexType, Index... Is>
|
template<typename IndexType, typename Index, Index... Is>
|
||||||
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
array<Index, sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, Is...>) {
|
array<Index, sizeof...(Is)> customIndices2Array(IndexType& idx, numeric_list<Index, Is...>) {
|
||||||
return { idx[Is]... };
|
return { idx[Is]... };
|
||||||
}
|
}
|
||||||
template<typename IndexType>
|
template<typename IndexType, typename Index>
|
||||||
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
array<Index, 0> customIndices2Array(IndexType&, numeric_list<Index>) {
|
array<Index, 0> customIndices2Array(IndexType&, numeric_list<Index>) {
|
||||||
return array<Index, 0>();
|
return array<Index, 0>();
|
||||||
|
@ -81,7 +81,7 @@ static void run(BufferTOut& bufOut, BufferTIn& bufI, const Eigen::SyclDevice& de
|
|||||||
});
|
});
|
||||||
};
|
};
|
||||||
dev.sycl_queue().submit(f);
|
dev.sycl_queue().submit(f);
|
||||||
dev.sycl_queue().throw_asynchronous();
|
dev.synchronize();
|
||||||
|
|
||||||
/* At this point, you could queue::wait_and_throw() to ensure that
|
/* At this point, you could queue::wait_and_throw() to ensure that
|
||||||
* errors are caught quickly. However, this would likely impact
|
* errors are caught quickly. However, this would likely impact
|
||||||
@ -173,7 +173,7 @@ struct FullReducer<Self, Op, const Eigen::SyclDevice, Vectorizable> {
|
|||||||
tmp_global_accessor.get_pointer()[0]+=InnerMostDimReducer<decltype(device_self_evaluator), Op, false>::reduce(device_self_evaluator, static_cast<typename DevExpr::Index>(red_factor*(rng)), static_cast<typename DevExpr::Index>(remaining), const_cast<Op&>(functor));
|
tmp_global_accessor.get_pointer()[0]+=InnerMostDimReducer<decltype(device_self_evaluator), Op, false>::reduce(device_self_evaluator, static_cast<typename DevExpr::Index>(red_factor*(rng)), static_cast<typename DevExpr::Index>(remaining), const_cast<Op&>(functor));
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
dev.sycl_queue().throw_asynchronous();
|
dev.synchronize();
|
||||||
|
|
||||||
/// This is used to recursively reduce the tmp value to an element of 1;
|
/// This is used to recursively reduce the tmp value to an element of 1;
|
||||||
syclGenericBufferReducer<CoeffReturnType,HostExpr>::run(out_buffer, temp_global_buffer,dev, GRange, outTileSize);
|
syclGenericBufferReducer<CoeffReturnType,HostExpr>::run(out_buffer, temp_global_buffer,dev, GRange, outTileSize);
|
||||||
@ -237,7 +237,7 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> {
|
|||||||
// }
|
// }
|
||||||
// });
|
// });
|
||||||
});
|
});
|
||||||
dev.sycl_queue().throw_asynchronous();
|
dev.synchronize();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -117,7 +117,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_impl(op.expression(), device)
|
: m_impl(op.expression(), device), m_shuffle(op.shufflePermutation())
|
||||||
{
|
{
|
||||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||||
const Shuffle& shuffle = op.shufflePermutation();
|
const Shuffle& shuffle = op.shufflePermutation();
|
||||||
@ -187,6 +187,11 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
||||||
|
|
||||||
|
// required by sycl
|
||||||
|
EIGEN_STRONG_INLINE const Shuffle& shufflePermutation() const {return m_shuffle;}
|
||||||
|
// required by sycl
|
||||||
|
EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const {return m_impl;}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const {
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
@ -206,11 +211,12 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
return inputIndex + index * m_inputStrides[NumDims - 1];
|
return inputIndex + index * m_inputStrides[NumDims - 1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
array<Index, NumDims> m_outputStrides;
|
array<Index, NumDims> m_outputStrides;
|
||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
|
/// required by sycl
|
||||||
|
Shuffle m_shuffle;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -124,6 +124,20 @@ KERNELBROKERCONVERTSLICEOP(const)
|
|||||||
KERNELBROKERCONVERTSLICEOP()
|
KERNELBROKERCONVERTSLICEOP()
|
||||||
#undef KERNELBROKERCONVERTSLICEOP
|
#undef KERNELBROKERCONVERTSLICEOP
|
||||||
|
|
||||||
|
|
||||||
|
#define KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(OPEXPR, CVQual)\
|
||||||
|
template<typename Param, typename XprType>\
|
||||||
|
struct ConvertToDeviceExpression<CVQual OPEXPR <Param, XprType> >{\
|
||||||
|
typedef CVQual OPEXPR<Param, typename ConvertToDeviceExpression<XprType>::Type> Type;\
|
||||||
|
};
|
||||||
|
|
||||||
|
KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, const)
|
||||||
|
KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorReshapingOp, )
|
||||||
|
|
||||||
|
KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, const)
|
||||||
|
KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP(TensorShufflingOp, )
|
||||||
|
#undef KERNELBROKERCONVERTRESHAPEANDSHUFFLEOP
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
} // namespace TensorSycl
|
} // namespace TensorSycl
|
||||||
} // namespace Eigen
|
} // namespace Eigen
|
||||||
|
@ -31,7 +31,6 @@ template <typename PtrType, size_t N, typename... Params>
|
|||||||
struct EvalToLHSConstructor {
|
struct EvalToLHSConstructor {
|
||||||
PtrType expr;
|
PtrType expr;
|
||||||
EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t) : expr(ConvertToActualTypeSycl(typename Eigen::internal::remove_all<PtrType>::type, utility::tuple::get<N>(t))) {}
|
EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t) : expr(ConvertToActualTypeSycl(typename Eigen::internal::remove_all<PtrType>::type, utility::tuple::get<N>(t))) {}
|
||||||
//EvalToLHSConstructor(const utility::tuple::Tuple<Params...> &t): expr((&(*(utility::tuple::get<N>(t).get_pointer())))) {}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// \struct ExprConstructor is used to reconstruct the expression on the device and
|
/// \struct ExprConstructor is used to reconstruct the expression on the device and
|
||||||
@ -57,8 +56,6 @@ CVQual PlaceHolder<CVQual TensorMap<T, Options_, MakePointer_>, N>, Params...>{\
|
|||||||
: expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get<N>(t)), fd.dimensions())){}\
|
: expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get<N>(t)), fd.dimensions())){}\
|
||||||
};
|
};
|
||||||
|
|
||||||
//: expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}
|
|
||||||
|
|
||||||
|
|
||||||
TENSORMAP(const)
|
TENSORMAP(const)
|
||||||
TENSORMAP()
|
TENSORMAP()
|
||||||
@ -198,7 +195,6 @@ CVQual PlaceHolder<CVQual TensorForcedEvalOp<DevExpr>, N>, Params...> {\
|
|||||||
ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
|
ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
|
||||||
: expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get<N>(t)), fd.dimensions())) {}\
|
: expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get<N>(t)), fd.dimensions())) {}\
|
||||||
};
|
};
|
||||||
//: expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}
|
|
||||||
|
|
||||||
FORCEDEVAL(const)
|
FORCEDEVAL(const)
|
||||||
FORCEDEVAL()
|
FORCEDEVAL()
|
||||||
@ -224,7 +220,6 @@ CVQual PlaceHolder<CVQual TensorReductionOp<OP, Dim, DevExpr>, N>, Params...> {\
|
|||||||
ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
|
ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
|
||||||
:expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get<N>(t)), fd.dimensions())) {}\
|
:expr(Type(ConvertToActualTypeSycl(typename Type::Scalar, utility::tuple::get<N>(t)), fd.dimensions())) {}\
|
||||||
};
|
};
|
||||||
//: expr(Type((&(*(utility::tuple::get<N>(t).get_pointer()))), fd.dimensions())) {}
|
|
||||||
|
|
||||||
SYCLREDUCTIONEXPR(const)
|
SYCLREDUCTIONEXPR(const)
|
||||||
SYCLREDUCTIONEXPR()
|
SYCLREDUCTIONEXPR()
|
||||||
@ -249,6 +244,26 @@ SYCLSLICEOPEXPR()
|
|||||||
#undef SYCLSLICEOPEXPR
|
#undef SYCLSLICEOPEXPR
|
||||||
|
|
||||||
|
|
||||||
|
#define SYCLRESHAPEANDSHUFFLEOPEXPRCONST(OPEXPR, CVQual)\
|
||||||
|
template<typename Param, typename OrigXprType, typename XprType, typename... Params>\
|
||||||
|
struct ExprConstructor<CVQual OPEXPR <Param, OrigXprType> , CVQual OPEXPR <Param, XprType>, Params... >{\
|
||||||
|
typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\
|
||||||
|
typedef CVQual OPEXPR <Param, typename my_xpr_type::Type> Type ;\
|
||||||
|
my_xpr_type xprExpr;\
|
||||||
|
Type expr;\
|
||||||
|
template <typename FuncDetector>\
|
||||||
|
ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
|
||||||
|
: xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.param()) {}\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorReshapingOp, const)
|
||||||
|
SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorReshapingOp, )
|
||||||
|
|
||||||
|
SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, const)
|
||||||
|
SYCLRESHAPEANDSHUFFLEOPEXPRCONST(TensorShufflingOp, )
|
||||||
|
#undef SYCLRESHAPEANDSHUFFLEOPEXPRCONST
|
||||||
|
|
||||||
|
|
||||||
/// template deduction for \ref ExprConstructor struct
|
/// template deduction for \ref ExprConstructor struct
|
||||||
template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params>
|
template <typename OrigExpr, typename IndexExpr, typename FuncD, typename... Params>
|
||||||
auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t)
|
auto createDeviceExpression(FuncD &funcD, const utility::tuple::Tuple<Params...> &t)
|
||||||
|
@ -43,172 +43,193 @@ template <typename Evaluator>
|
|||||||
struct ExtractAccessor;
|
struct ExtractAccessor;
|
||||||
|
|
||||||
struct AccessorConstructor{
|
struct AccessorConstructor{
|
||||||
template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, Arg eval)
|
template<typename Arg> static inline auto getTuple(cl::sycl::handler& cgh, const Arg& eval)
|
||||||
-> decltype(ExtractAccessor<Arg>::getTuple(cgh, eval)) {
|
-> decltype(ExtractAccessor<Arg>::getTuple(cgh, eval)) {
|
||||||
return ExtractAccessor<Arg>::getTuple(cgh, eval);
|
return ExtractAccessor<Arg>::getTuple(cgh, eval);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1, Arg2 eval2)
|
template<typename Arg1, typename Arg2> static inline auto getTuple(cl::sycl::handler& cgh, const Arg1& eval1, const Arg2& eval2)
|
||||||
-> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2))) {
|
-> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2))) {
|
||||||
return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2));
|
return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1), ExtractAccessor<Arg2>::getTuple(cgh, eval2));
|
||||||
}
|
}
|
||||||
template<typename Arg1, typename Arg2, typename Arg3> static inline auto getTuple(cl::sycl::handler& cgh, Arg1 eval1 , Arg2 eval2 , Arg3 eval3)
|
template<typename Arg1, typename Arg2, typename Arg3> static inline auto getTuple(cl::sycl::handler& cgh, const Arg1& eval1 , const Arg2& eval2 , const Arg3& eval3)
|
||||||
-> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)))) {
|
-> decltype(utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)))) {
|
||||||
return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)));
|
return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)));
|
||||||
}
|
}
|
||||||
template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval)
|
template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, const Arg& eval)
|
||||||
-> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM>(cgh,eval.data()))){
|
-> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM>(cgh,eval.data()))){
|
||||||
return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM>(cgh,eval.data()));
|
return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM>(cgh,eval.data()));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is
|
/// specialisation of the \ref ExtractAccessor struct when the node type is
|
||||||
/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp and const TensorBroadcastingOp
|
/// TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp
|
||||||
template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
|
#define SYCLUNARYCATEGORYEXTACC(CVQual)\
|
||||||
struct ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {
|
template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>\
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> eval)
|
struct ExtractAccessor<TensorEvaluator<CVQual UnaryCategory<OP, RHSExpr>, Dev> > {\
|
||||||
-> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual UnaryCategory<OP, RHSExpr>, Dev>& eval)\
|
||||||
return AccessorConstructor::getTuple(cgh, eval.impl());
|
-> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\
|
||||||
}
|
return AccessorConstructor::getTuple(cgh, eval.impl());\
|
||||||
|
}\
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseNullaryOp, TensorCwiseUnaryOp and TensorBroadcastingOp
|
SYCLUNARYCATEGORYEXTACC(const)
|
||||||
template <template<class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
|
SYCLUNARYCATEGORYEXTACC()
|
||||||
struct ExtractAccessor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> >
|
#undef SYCLUNARYCATEGORYEXTACC
|
||||||
: ExtractAccessor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {};
|
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorCwiseBinaryOp
|
|
||||||
template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
|
|
||||||
struct ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {
|
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> eval)
|
|
||||||
-> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){
|
|
||||||
return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseBinaryOp
|
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseBinaryOp
|
||||||
template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
|
#define SYCLBINARYCATEGORYEXTACC(CVQual)\
|
||||||
struct ExtractAccessor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >
|
template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>\
|
||||||
: ExtractAccessor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{};
|
struct ExtractAccessor<TensorEvaluator<CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {\
|
||||||
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& eval)\
|
||||||
|
-> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){\
|
||||||
|
return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());\
|
||||||
|
}\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLBINARYCATEGORYEXTACC(const)
|
||||||
|
SYCLBINARYCATEGORYEXTACC()
|
||||||
|
#undef SYCLBINARYCATEGORYEXTACC
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is
|
/// specialisation of the \ref ExtractAccessor struct when the node type is
|
||||||
/// const TensorCwiseTernaryOp
|
/// const TensorCwiseTernaryOp
|
||||||
template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
|
#define SYCLTERNARYCATEGORYEXTACC(CVQual)\
|
||||||
struct ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {
|
template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>\
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> eval)
|
struct ExtractAccessor<TensorEvaluator<CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {\
|
||||||
-> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& eval)\
|
||||||
return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl());
|
-> decltype(AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl())){\
|
||||||
}
|
return AccessorConstructor::getTuple(cgh, eval.arg1Impl(), eval.arg2Impl(), eval.arg3Impl());\
|
||||||
|
}\
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorCwiseTernaryOp
|
SYCLTERNARYCATEGORYEXTACC(const)
|
||||||
template <template<class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
|
SYCLTERNARYCATEGORYEXTACC()
|
||||||
struct ExtractAccessor<TensorEvaluator<TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >
|
#undef SYCLTERNARYCATEGORYEXTACC
|
||||||
: ExtractAccessor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{};
|
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is
|
|
||||||
/// const TensorCwiseSelectOp. This is a special case where there is no OP
|
|
||||||
template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
|
|
||||||
struct ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {
|
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> eval)
|
|
||||||
-> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){
|
|
||||||
return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is
|
/// specialisation of the \ref ExtractAccessor struct when the node type is
|
||||||
/// TensorCwiseSelectOp. This is a special case where there is no OP
|
/// TensorCwiseSelectOp. This is a special case where there is no OP
|
||||||
template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
|
#define SYCLSELECTOPEXTACC(CVQual)\
|
||||||
struct ExtractAccessor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >
|
template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>\
|
||||||
: ExtractAccessor<TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >{};
|
struct ExtractAccessor<TensorEvaluator<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {\
|
||||||
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& eval)\
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorAssignOp
|
-> decltype(AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl())){\
|
||||||
template <typename LHSExpr, typename RHSExpr, typename Dev>
|
return AccessorConstructor::getTuple(cgh, eval.cond_impl(), eval.then_impl(), eval.else_impl());\
|
||||||
struct ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {
|
}\
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> eval)
|
|
||||||
-> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){
|
|
||||||
return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
SYCLSELECTOPEXTACC(const)
|
||||||
|
SYCLSELECTOPEXTACC()
|
||||||
|
#undef SYCLSELECTOPEXTACC
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorAssignOp
|
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorAssignOp
|
||||||
template <typename LHSExpr, typename RHSExpr, typename Dev>
|
#define SYCLTENSORASSIGNOPEXTACC(CVQual)\
|
||||||
struct ExtractAccessor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> >
|
template <typename LHSExpr, typename RHSExpr, typename Dev>\
|
||||||
: ExtractAccessor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{};
|
struct ExtractAccessor<TensorEvaluator<CVQual TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {\
|
||||||
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorAssignOp<LHSExpr, RHSExpr>, Dev>& eval)\
|
||||||
|
-> decltype(AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl())){\
|
||||||
|
return AccessorConstructor::getTuple(cgh, eval.left_impl(), eval.right_impl());\
|
||||||
|
}\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLTENSORASSIGNOPEXTACC(const)
|
||||||
|
SYCLTENSORASSIGNOPEXTACC()
|
||||||
|
#undef SYCLTENSORASSIGNOPEXTACC
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorMap
|
/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorMap
|
||||||
#define TENSORMAPEXPR(CVQual, ACCType)\
|
#define TENSORMAPEXPR(CVQual, ACCType)\
|
||||||
template <typename PlainObjectType, int Options_, typename Dev>\
|
template <typename PlainObjectType, int Options_, typename Dev>\
|
||||||
struct ExtractAccessor<TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> > {\
|
struct ExtractAccessor<TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> > {\
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev> eval)\
|
static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorMap<PlainObjectType, Options_>, Dev>& eval)\
|
||||||
-> decltype(AccessorConstructor::template getAccessor<ACCType>(cgh, eval)){\
|
-> decltype(AccessorConstructor::template getAccessor<ACCType>(cgh, eval)){\
|
||||||
return AccessorConstructor::template getAccessor<ACCType>(cgh, eval);\
|
return AccessorConstructor::template getAccessor<ACCType>(cgh, eval);\
|
||||||
}\
|
}\
|
||||||
};
|
};
|
||||||
|
|
||||||
TENSORMAPEXPR(const, cl::sycl::access::mode::read)
|
TENSORMAPEXPR(const, cl::sycl::access::mode::read)
|
||||||
TENSORMAPEXPR(, cl::sycl::access::mode::read_write)
|
TENSORMAPEXPR(, cl::sycl::access::mode::read_write)
|
||||||
#undef TENSORMAPEXPR
|
#undef TENSORMAPEXPR
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorForcedEvalOp
|
|
||||||
template <typename Expr, typename Dev>
|
|
||||||
struct ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> > {
|
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> eval)
|
|
||||||
-> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){
|
|
||||||
return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorForcedEvalOp
|
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorForcedEvalOp
|
||||||
template <typename Expr, typename Dev>
|
#define SYCLFORCEDEVALEXTACC(CVQual)\
|
||||||
struct ExtractAccessor<TensorEvaluator<TensorForcedEvalOp<Expr>, Dev> >
|
template <typename Expr, typename Dev>\
|
||||||
: ExtractAccessor<TensorEvaluator<const TensorForcedEvalOp<Expr>, Dev> >{};
|
struct ExtractAccessor<TensorEvaluator<CVQual TensorForcedEvalOp<Expr>, Dev> > {\
|
||||||
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorForcedEvalOp<Expr>, Dev>& eval)\
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorEvalToOp
|
-> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){\
|
||||||
template <typename Expr, typename Dev>
|
return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);\
|
||||||
struct ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> > {
|
}\
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<const TensorEvalToOp<Expr>, Dev> eval)
|
|
||||||
-> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){
|
|
||||||
return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()));
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
SYCLFORCEDEVALEXTACC(const)
|
||||||
|
SYCLFORCEDEVALEXTACC()
|
||||||
|
#undef SYCLFORCEDEVALEXTACC
|
||||||
|
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorEvalToOp
|
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorEvalToOp
|
||||||
template <typename Expr, typename Dev>
|
#define SYCLEVALTOEXTACC(CVQual)\
|
||||||
struct ExtractAccessor<TensorEvaluator<TensorEvalToOp<Expr>, Dev> >
|
template <typename Expr, typename Dev>\
|
||||||
: ExtractAccessor<TensorEvaluator<const TensorEvalToOp<Expr>, Dev> >{};
|
struct ExtractAccessor<TensorEvaluator<CVQual TensorEvalToOp<Expr>, Dev> > {\
|
||||||
|
static inline auto getTuple(cl::sycl::handler& cgh,const TensorEvaluator<CVQual TensorEvalToOp<Expr>, Dev>& eval)\
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is const TensorReductionOp
|
-> decltype(utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()))){\
|
||||||
template <typename OP, typename Dim, typename Expr, typename Dev>
|
return utility::tuple::append(AccessorConstructor::template getAccessor<cl::sycl::access::mode::write>(cgh, eval), AccessorConstructor::getTuple(cgh, eval.impl()));\
|
||||||
struct ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> > {
|
}\
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> eval)
|
|
||||||
-> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){
|
|
||||||
return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorReductionOp
|
SYCLEVALTOEXTACC(const)
|
||||||
template <typename OP, typename Dim, typename Expr, typename Dev>
|
SYCLEVALTOEXTACC()
|
||||||
struct ExtractAccessor<TensorEvaluator<TensorReductionOp<OP, Dim, Expr>, Dev> >
|
#undef SYCLEVALTOEXTACC
|
||||||
: ExtractAccessor<TensorEvaluator<const TensorReductionOp<OP, Dim, Expr>, Dev> >{};
|
|
||||||
|
|
||||||
|
/// specialisation of the \ref ExtractAccessor struct when the node type is TensorReductionOp
|
||||||
|
#define SYCLREDUCTIONEXTACC(CVQual)\
|
||||||
|
template <typename OP, typename Dim, typename Expr, typename Dev>\
|
||||||
|
struct ExtractAccessor<TensorEvaluator<CVQual TensorReductionOp<OP, Dim, Expr>, Dev> > {\
|
||||||
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorReductionOp<OP, Dim, Expr>, Dev>& eval)\
|
||||||
|
-> decltype(AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval)){\
|
||||||
|
return AccessorConstructor::template getAccessor<cl::sycl::access::mode::read>(cgh, eval);\
|
||||||
|
}\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLREDUCTIONEXTACC(const)
|
||||||
|
SYCLREDUCTIONEXTACC()
|
||||||
|
#undef SYCLREDUCTIONEXTACC
|
||||||
|
|
||||||
/// specialisation of the \ref ExtractAccessor struct when the node type is
|
/// specialisation of the \ref ExtractAccessor struct when the node type is
|
||||||
/// const TensorSlicingOp. This is a special case where there is no OP
|
/// const TensorSlicingOp. This is a special case where there is no OP
|
||||||
template <typename StartIndices, typename Sizes, typename XprType, typename Dev>
|
#define SYCLSLICEOPEXTACC(CVQual)\
|
||||||
struct ExtractAccessor<TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > {
|
template <typename StartIndices, typename Sizes, typename XprType, typename Dev>\
|
||||||
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> eval)
|
struct ExtractAccessor<TensorEvaluator<CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > {\
|
||||||
-> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Dev>& eval)\
|
||||||
return AccessorConstructor::getTuple(cgh, eval.impl());
|
-> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\
|
||||||
}
|
return AccessorConstructor::getTuple(cgh, eval.impl());\
|
||||||
|
}\
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename StartIndices, typename Sizes, typename XprType, typename Dev>
|
SYCLSLICEOPEXTACC(const)
|
||||||
struct ExtractAccessor<TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> >
|
SYCLSLICEOPEXTACC()
|
||||||
:ExtractAccessor<TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> >{};
|
#undef SYCLSLICEOPEXTACC
|
||||||
|
|
||||||
|
#define RESHAPEANDSHUFFOPEXTRACC(OPEXPR, CVQual)\
|
||||||
|
template<typename Param, typename XprType, typename Dev>\
|
||||||
|
struct ExtractAccessor<TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev> > {\
|
||||||
|
static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual OPEXPR<Param, XprType>, Dev>& eval)\
|
||||||
|
-> decltype(AccessorConstructor::getTuple(cgh, eval.impl())){\
|
||||||
|
return AccessorConstructor::getTuple(cgh, eval.impl());\
|
||||||
|
}\
|
||||||
|
};
|
||||||
|
// tensor reshaping
|
||||||
|
RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, const)
|
||||||
|
RESHAPEANDSHUFFOPEXTRACC(TensorReshapingOp, )
|
||||||
|
/// Tensor shuffling
|
||||||
|
RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, const)
|
||||||
|
RESHAPEANDSHUFFOPEXTRACC(TensorShufflingOp, )
|
||||||
|
#undef RESHAPEANDSHUFFOPEXTRACC
|
||||||
|
|
||||||
/// template deduction for \ref ExtractAccessor
|
/// template deduction for \ref ExtractAccessor
|
||||||
template <typename Evaluator>
|
template <typename Evaluator>
|
||||||
auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr)
|
auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& eval)
|
||||||
-> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, expr)) {
|
-> decltype(ExtractAccessor<Evaluator>::getTuple(cgh, eval)) {
|
||||||
return ExtractAccessor<Evaluator>::getTuple(cgh, expr);
|
return ExtractAccessor<Evaluator>::getTuple(cgh, eval);
|
||||||
}
|
}
|
||||||
|
|
||||||
} /// namespace TensorSycl
|
} /// namespace TensorSycl
|
||||||
|
@ -36,152 +36,164 @@ namespace internal {
|
|||||||
template <typename Evaluator> struct FunctorExtractor{
|
template <typename Evaluator> struct FunctorExtractor{
|
||||||
typedef typename Evaluator::Dimensions Dimensions;
|
typedef typename Evaluator::Dimensions Dimensions;
|
||||||
const Dimensions m_dimensions;
|
const Dimensions m_dimensions;
|
||||||
const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
FunctorExtractor(const Evaluator& expr)
|
FunctorExtractor(const Evaluator& expr)
|
||||||
: m_dimensions(expr.dimensions()) {}
|
: m_dimensions(expr.dimensions()) {}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
||||||
/// const TensorCwiseNullaryOp, const TensorCwiseUnaryOp, and const TensorBroadcastingOp
|
/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp
|
||||||
template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
|
#define SYCLEXTRFUNCUNARY(CVQual)\
|
||||||
struct FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> > {
|
template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>\
|
||||||
FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
|
struct FunctorExtractor<TensorEvaluator<CVQual UnaryCategory<OP, RHSExpr>, Dev> > {\
|
||||||
OP func;
|
FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;\
|
||||||
FunctorExtractor(const TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev>& expr)
|
OP func;\
|
||||||
: rhsExpr(expr.impl()), func(expr.functor()) {}
|
FunctorExtractor(const TensorEvaluator<CVQual UnaryCategory<OP, RHSExpr>, Dev>& expr)\
|
||||||
};
|
: rhsExpr(expr.impl()), func(expr.functor()) {}\
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
|
||||||
/// TensorCwiseNullaryOp, TensorCwiseUnaryOp, and TensorBroadcastingOp
|
|
||||||
template <template <class, class> class UnaryCategory, typename OP, typename RHSExpr, typename Dev>
|
|
||||||
struct FunctorExtractor<TensorEvaluator<UnaryCategory<OP, RHSExpr>, Dev> >
|
|
||||||
: FunctorExtractor<TensorEvaluator<const UnaryCategory<OP, RHSExpr>, Dev> >{};
|
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
|
||||||
/// const TensorCwiseBinaryOp
|
|
||||||
template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
|
|
||||||
struct FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {
|
|
||||||
FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;
|
|
||||||
FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
|
|
||||||
OP func;
|
|
||||||
FunctorExtractor(const TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr)
|
|
||||||
: lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
SYCLEXTRFUNCUNARY(const)
|
||||||
/// const TensorCwiseBinaryOp
|
SYCLEXTRFUNCUNARY()
|
||||||
template <template <class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>
|
#undef SYCLEXTRFUNCUNARY
|
||||||
struct FunctorExtractor<TensorEvaluator<BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >
|
|
||||||
: FunctorExtractor<TensorEvaluator<const BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> >{};
|
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
||||||
/// const TensorCwiseTernaryOp
|
/// TensorCwiseBinaryOp
|
||||||
template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev>
|
#define SYCLEXTRFUNCBIINARY(CVQual)\
|
||||||
struct FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {
|
template <template<class, class, class> class BinaryCategory, typename OP, typename LHSExpr, typename RHSExpr, typename Dev>\
|
||||||
FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr;
|
struct FunctorExtractor<TensorEvaluator<CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Dev> > {\
|
||||||
FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr;
|
FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;\
|
||||||
FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr;
|
FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;\
|
||||||
OP func;
|
OP func;\
|
||||||
FunctorExtractor(const TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr)
|
FunctorExtractor(const TensorEvaluator<CVQual BinaryCategory<OP, LHSExpr, RHSExpr>, Dev>& expr)\
|
||||||
: arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {}
|
: lhsExpr(expr.left_impl()),rhsExpr(expr.right_impl()),func(expr.functor()) {}\
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
SYCLEXTRFUNCBIINARY(const)
|
||||||
/// TensorCwiseTernaryOp
|
SYCLEXTRFUNCBIINARY()
|
||||||
template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr, typename Dev>
|
#undef SYCLEXTRFUNCBIINARY
|
||||||
struct FunctorExtractor<TensorEvaluator< TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >
|
|
||||||
:FunctorExtractor<TensorEvaluator<const TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> >{};
|
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
/// specialisation of the \ref FunctorExtractor struct when the node type is TensorCwiseTernaryOp
|
||||||
/// const TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated.
|
#define SYCLEXTRFUNCTERNARY(CVQual)\
|
||||||
template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
|
template <template <class, class, class, class> class TernaryCategory, typename OP, typename Arg1Expr, typename Arg2Expr, typename Arg3Expr,typename Dev>\
|
||||||
struct FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {
|
struct FunctorExtractor<TensorEvaluator<CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev> > {\
|
||||||
FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr;
|
FunctorExtractor<TensorEvaluator<Arg1Expr, Dev> > arg1Expr;\
|
||||||
FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr;
|
FunctorExtractor<TensorEvaluator<Arg2Expr, Dev> > arg2Expr;\
|
||||||
FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr;
|
FunctorExtractor<TensorEvaluator<Arg3Expr, Dev> > arg3Expr;\
|
||||||
FunctorExtractor(const TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr)
|
OP func;\
|
||||||
: ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {}
|
FunctorExtractor(const TensorEvaluator<CVQual TernaryCategory<OP, Arg1Expr, Arg2Expr, Arg3Expr>, Dev>& expr)\
|
||||||
|
: arg1Expr(expr.arg1Impl()), arg2Expr(expr.arg2Impl()), arg3Expr(expr.arg3Impl()), func(expr.functor()) {}\
|
||||||
};
|
};
|
||||||
|
|
||||||
|
SYCLEXTRFUNCTERNARY(const)
|
||||||
|
SYCLEXTRFUNCTERNARY()
|
||||||
|
#undef SYCLEXTRFUNCTERNARY
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
||||||
/// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated
|
/// TensorCwiseSelectOp. This is an specialisation without OP so it has to be separated.
|
||||||
template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>
|
#define SYCLEXTRFUNCSELECTOP(CVQual)\
|
||||||
struct FunctorExtractor<TensorEvaluator<TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> >
|
template <typename IfExpr, typename ThenExpr, typename ElseExpr, typename Dev>\
|
||||||
:FunctorExtractor< TensorEvaluator<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {};
|
struct FunctorExtractor< TensorEvaluator<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev> > {\
|
||||||
|
FunctorExtractor<TensorEvaluator<IfExpr, Dev> > ifExpr;\
|
||||||
|
FunctorExtractor<TensorEvaluator<ThenExpr, Dev> > thenExpr;\
|
||||||
|
FunctorExtractor<TensorEvaluator<ElseExpr, Dev> > elseExpr;\
|
||||||
|
FunctorExtractor(const TensorEvaluator<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr>, Dev>& expr)\
|
||||||
|
: ifExpr(expr.cond_impl()), thenExpr(expr.then_impl()), elseExpr(expr.else_impl()) {}\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLEXTRFUNCSELECTOP(const)
|
||||||
|
SYCLEXTRFUNCSELECTOP()
|
||||||
|
#undef SYCLEXTRFUNCSELECTOP
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
||||||
/// const TensorAssignOp. This is an specialisation without OP so it has to be separated.
|
/// const TensorAssignOp. This is an specialisation without OP so it has to be separated.
|
||||||
template <typename LHSExpr, typename RHSExpr, typename Dev>
|
#define SYCLEXTRFUNCASSIGNOP(CVQual)\
|
||||||
struct FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {
|
template <typename LHSExpr, typename RHSExpr, typename Dev>\
|
||||||
FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;
|
struct FunctorExtractor<TensorEvaluator<CVQual TensorAssignOp<LHSExpr, RHSExpr>, Dev> > {\
|
||||||
FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
|
FunctorExtractor<TensorEvaluator<LHSExpr, Dev> > lhsExpr;\
|
||||||
FunctorExtractor(const TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr)
|
FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;\
|
||||||
: lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {}
|
FunctorExtractor(const TensorEvaluator<CVQual TensorAssignOp<LHSExpr, RHSExpr>, Dev>& expr)\
|
||||||
|
: lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {}\
|
||||||
|
};
|
||||||
|
SYCLEXTRFUNCASSIGNOP(const)
|
||||||
|
SYCLEXTRFUNCASSIGNOP()
|
||||||
|
#undef SYCLEXTRFUNCASSIGNOP
|
||||||
|
|
||||||
|
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
||||||
|
/// TensorEvalToOp, This is an specialisation without OP so it has to be separated.
|
||||||
|
#define SYCLEXTRFUNCEVALTOOP(CVQual)\
|
||||||
|
template <typename RHSExpr, typename Dev>\
|
||||||
|
struct FunctorExtractor<TensorEvaluator<CVQual TensorEvalToOp<RHSExpr>, Dev> > {\
|
||||||
|
FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;\
|
||||||
|
FunctorExtractor(const TensorEvaluator<CVQual TensorEvalToOp<RHSExpr>, Dev>& expr)\
|
||||||
|
: rhsExpr(expr.impl()) {}\
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
SYCLEXTRFUNCEVALTOOP(const)
|
||||||
/// TensorAssignOp. This is an specialisation without OP so it has to be separated.
|
SYCLEXTRFUNCEVALTOOP()
|
||||||
template <typename LHSExpr, typename RHSExpr, typename Dev>
|
#undef SYCLEXTRFUNCEVALTOOP
|
||||||
struct FunctorExtractor<TensorEvaluator<TensorAssignOp<LHSExpr, RHSExpr>, Dev> >
|
|
||||||
:FunctorExtractor<TensorEvaluator<const TensorAssignOp<LHSExpr, RHSExpr>, Dev> >{};
|
|
||||||
|
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
|
||||||
/// const TensorEvalToOp, This is an specialisation without OP so it has to be separated.
|
|
||||||
template <typename RHSExpr, typename Dev>
|
|
||||||
struct FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {
|
|
||||||
FunctorExtractor<TensorEvaluator<RHSExpr, Dev> > rhsExpr;
|
|
||||||
FunctorExtractor(const TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev>& expr)
|
|
||||||
: rhsExpr(expr.impl()) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
|
||||||
/// TensorEvalToOp. This is a specialisation without OP so it has to be separated.
|
|
||||||
template <typename RHSExpr, typename Dev>
|
|
||||||
struct FunctorExtractor<TensorEvaluator<TensorEvalToOp<RHSExpr>, Dev> >
|
|
||||||
: FunctorExtractor<TensorEvaluator<const TensorEvalToOp<RHSExpr>, Dev> > {};
|
|
||||||
|
|
||||||
template<typename Dim, size_t NumOutputDim> struct DimConstr {
|
template<typename Dim, size_t NumOutputDim> struct DimConstr {
|
||||||
template<typename InDim>
|
template<typename InDim>
|
||||||
static inline Dim getDim(InDim dims ) {return dims;}
|
static EIGEN_STRONG_INLINE Dim getDim(InDim dims ) {return dims;}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Dim> struct DimConstr<Dim, 0> {
|
template<typename Dim> struct DimConstr<Dim, 0> {
|
||||||
template<typename InDim>
|
template<typename InDim>
|
||||||
static inline Dim getDim(InDim dims ) {return Dim(static_cast<Dim>(dims.TotalSize()));}
|
static EIGEN_STRONG_INLINE Dim getDim(InDim dims ) {return Dim(static_cast<Dim>(dims.TotalSize()));}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
|
#define SYCLEXTRFUNCREDUCTIONOP(CVQual)\
|
||||||
struct FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{
|
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>\
|
||||||
typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Evaluator;
|
struct FunctorExtractor<TensorEvaluator<CVQual TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{\
|
||||||
typedef typename Eigen::internal::conditional<Evaluator::NumOutputDims==0, DSizes<typename Evaluator::Index, 1>, typename Evaluator::Dimensions >::type Dimensions;
|
typedef TensorEvaluator<CVQual TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Evaluator;\
|
||||||
const Dimensions m_dimensions;
|
typedef typename Eigen::internal::conditional<Evaluator::NumOutputDims==0, DSizes<typename Evaluator::Index, 1>, typename Evaluator::Dimensions >::type Dimensions;\
|
||||||
const Dimensions& dimensions() const { return m_dimensions; }
|
const Dimensions m_dimensions;\
|
||||||
FunctorExtractor(const TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>& expr)
|
EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }\
|
||||||
: m_dimensions(DimConstr<Dimensions, Evaluator::NumOutputDims>::getDim(expr.dimensions())) {}
|
FunctorExtractor(const TensorEvaluator<CVQual TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>& expr)\
|
||||||
|
: m_dimensions(DimConstr<Dimensions, Evaluator::NumOutputDims>::getDim(expr.dimensions())) {}\
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
|
SYCLEXTRFUNCREDUCTIONOP(const)
|
||||||
struct FunctorExtractor<TensorEvaluator<TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>
|
SYCLEXTRFUNCREDUCTIONOP()
|
||||||
: FunctorExtractor<TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>>{};
|
#undef SYCLEXTRFUNCREDUCTIONOP
|
||||||
|
|
||||||
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
/// specialisation of the \ref FunctorExtractor struct when the node type is
|
||||||
/// const TensorSlicingOp. This is an specialisation without OP so it has to be separated.
|
/// const TensorSlicingOp. This is an specialisation without OP so it has to be separated.
|
||||||
template <typename StartIndices, typename Sizes, typename XprType, typename Dev>
|
#define SYCLEXTRFUNCTSLICEOP(CVQual)\
|
||||||
struct FunctorExtractor<TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > {
|
template <typename StartIndices, typename Sizes, typename XprType, typename Dev>\
|
||||||
FunctorExtractor<TensorEvaluator<XprType, Dev> > xprExpr;
|
struct FunctorExtractor<TensorEvaluator<CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > {\
|
||||||
const StartIndices m_offsets;
|
FunctorExtractor<TensorEvaluator<XprType, Dev> > xprExpr;\
|
||||||
const Sizes m_dimensions;
|
const StartIndices m_offsets;\
|
||||||
FunctorExtractor(const TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev>& expr)
|
const Sizes m_dimensions;\
|
||||||
: xprExpr(expr.impl()), m_offsets(expr.startIndices()), m_dimensions(expr.dimensions()) {}
|
FunctorExtractor(const TensorEvaluator<CVQual TensorSlicingOp<StartIndices, Sizes, XprType>, Dev>& expr)\
|
||||||
EIGEN_STRONG_INLINE const StartIndices& startIndices() const {return m_offsets;}
|
: xprExpr(expr.impl()), m_offsets(expr.startIndices()), m_dimensions(expr.dimensions()) {}\
|
||||||
EIGEN_STRONG_INLINE const Sizes& dimensions() const {return m_dimensions;}
|
EIGEN_STRONG_INLINE const StartIndices& startIndices() const {return m_offsets;}\
|
||||||
|
EIGEN_STRONG_INLINE const Sizes& dimensions() const {return m_dimensions;}\
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename StartIndices, typename Sizes, typename XprType, typename Dev>
|
SYCLEXTRFUNCTSLICEOP(const)
|
||||||
struct FunctorExtractor<TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> >
|
SYCLEXTRFUNCTSLICEOP()
|
||||||
:FunctorExtractor<TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, XprType>, Dev> > {};
|
#undef SYCLEXTRFUNCTSLICEOP
|
||||||
|
|
||||||
|
// Had to separate reshapeOP otherwise it will be mistaken by UnaryCategory
|
||||||
|
#define SYCLRESHAPEANDSHUFFLEOPFUNCEXT(OPEXPR, FUNCCALL, CVQual)\
|
||||||
|
template<typename Param, typename XprType, typename Dev>\
|
||||||
|
struct FunctorExtractor<Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev> > {\
|
||||||
|
FunctorExtractor<Eigen::TensorEvaluator<XprType, Dev> > xprExpr;\
|
||||||
|
const Param m_param;\
|
||||||
|
EIGEN_STRONG_INLINE const Param& param() const { return m_param; }\
|
||||||
|
FunctorExtractor(const Eigen::TensorEvaluator<CVQual Eigen::OPEXPR<Param, XprType>, Dev>& expr)\
|
||||||
|
: xprExpr(expr.impl()), m_param(expr.FUNCCALL) {}\
|
||||||
|
};
|
||||||
|
|
||||||
|
SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorReshapingOp, dimensions(), const)
|
||||||
|
SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorReshapingOp, dimensions(), )
|
||||||
|
|
||||||
|
SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), const)
|
||||||
|
SYCLRESHAPEANDSHUFFLEOPFUNCEXT(TensorShufflingOp, shufflePermutation(), )
|
||||||
|
#undef SYCLRESHAPEOPEXPR
|
||||||
|
|
||||||
/// template deduction function for FunctorExtractor
|
/// template deduction function for FunctorExtractor
|
||||||
template <typename Evaluator>
|
template <typename Evaluator>
|
||||||
auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> {
|
auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> {
|
||||||
|
@ -44,77 +44,97 @@ struct CategoryCount<Arg,Args...>{
|
|||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is const TensorMap
|
/// specialisation of the \ref LeafCount struct when the node type is const TensorMap
|
||||||
template <typename PlainObjectType, int Options_, template <class> class MakePointer_>
|
#define SYCLTENSORMAPLEAFCOUNT(CVQual)\
|
||||||
struct LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> > {
|
template <typename PlainObjectType, int Options_, template <class> class MakePointer_>\
|
||||||
static const size_t Count =1;
|
struct LeafCount<CVQual TensorMap<PlainObjectType, Options_, MakePointer_> > {\
|
||||||
|
static const size_t Count =1;\
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is TensorMap
|
SYCLTENSORMAPLEAFCOUNT(const)
|
||||||
template <typename PlainObjectType, int Options_, template <class> class MakePointer_>
|
SYCLTENSORMAPLEAFCOUNT()
|
||||||
struct LeafCount<TensorMap<PlainObjectType, Options_, MakePointer_> > :LeafCount<const TensorMap<PlainObjectType, Options_, MakePointer_> >{};
|
#undef SYCLTENSORMAPLEAFCOUNT
|
||||||
|
|
||||||
// const TensorCwiseUnaryOp, const TensorCwiseNullaryOp, const TensorCwiseBinaryOp, const TensorCwiseTernaryOp, and Const TensorBroadcastingOp
|
// TensorCwiseUnaryOp, TensorCwiseNullaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, and TensorBroadcastingOp
|
||||||
template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>
|
#define SYCLCATEGORYLEAFCOUNT(CVQual)\
|
||||||
struct LeafCount<const CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {};
|
template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>\
|
||||||
// TensorCwiseUnaryOp, TensorCwiseNullaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, and TensorBroadcastingOp
|
struct LeafCount<CVQual CategoryExpr<OP, RHSExpr...> >: CategoryCount<RHSExpr...> {};
|
||||||
template <template <class, class...> class CategoryExpr, typename OP, typename... RHSExpr>
|
|
||||||
struct LeafCount<CategoryExpr<OP, RHSExpr...> > :LeafCount<const CategoryExpr<OP, RHSExpr...> >{};
|
SYCLCATEGORYLEAFCOUNT(const)
|
||||||
|
SYCLCATEGORYLEAFCOUNT()
|
||||||
|
#undef SYCLCATEGORYLEAFCOUNT
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is const TensorSelectOp is an exception
|
/// specialisation of the \ref LeafCount struct when the node type is const TensorSelectOp is an exception
|
||||||
template <typename IfExpr, typename ThenExpr, typename ElseExpr>
|
#define SYCLSELECTOPLEAFCOUNT(CVQual)\
|
||||||
struct LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {};
|
template <typename IfExpr, typename ThenExpr, typename ElseExpr>\
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is TensorSelectOp
|
struct LeafCount<CVQual TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > : CategoryCount<IfExpr, ThenExpr, ElseExpr> {};
|
||||||
template <typename IfExpr, typename ThenExpr, typename ElseExpr>
|
|
||||||
struct LeafCount<TensorSelectOp<IfExpr, ThenExpr, ElseExpr> >: LeafCount<const TensorSelectOp<IfExpr, ThenExpr, ElseExpr> > {};
|
SYCLSELECTOPLEAFCOUNT(const)
|
||||||
|
SYCLSELECTOPLEAFCOUNT()
|
||||||
|
#undef SYCLSELECTOPLEAFCOUNT
|
||||||
|
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is const TensorAssignOp
|
/// specialisation of the \ref LeafCount struct when the node type is TensorAssignOp
|
||||||
template <typename LHSExpr, typename RHSExpr>
|
#define SYCLLEAFCOUNTASSIGNOP(CVQual)\
|
||||||
struct LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {};
|
template <typename LHSExpr, typename RHSExpr>\
|
||||||
|
struct LeafCount<CVQual TensorAssignOp<LHSExpr, RHSExpr> >: CategoryCount<LHSExpr,RHSExpr> {};
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is
|
SYCLLEAFCOUNTASSIGNOP(const)
|
||||||
/// TensorAssignOp is an exception. It is not the same as Unary
|
SYCLLEAFCOUNTASSIGNOP()
|
||||||
template <typename LHSExpr, typename RHSExpr>
|
#undef SYCLLEAFCOUNTASSIGNOP
|
||||||
struct LeafCount<TensorAssignOp<LHSExpr, RHSExpr> > :LeafCount<const TensorAssignOp<LHSExpr, RHSExpr> >{};
|
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is const TensorForcedEvalOp
|
/// specialisation of the \ref LeafCount struct when the node type is const TensorForcedEvalOp
|
||||||
template <typename Expr>
|
#define SYCLFORCEDEVALLEAFCOUNT(CVQual)\
|
||||||
struct LeafCount<const TensorForcedEvalOp<Expr> > {
|
template <typename Expr>\
|
||||||
static const size_t Count =1;
|
struct LeafCount<CVQual TensorForcedEvalOp<Expr> > {\
|
||||||
|
static const size_t Count =1;\
|
||||||
};
|
};
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is TensorForcedEvalOp
|
SYCLFORCEDEVALLEAFCOUNT(const)
|
||||||
template <typename Expr>
|
SYCLFORCEDEVALLEAFCOUNT()
|
||||||
struct LeafCount<TensorForcedEvalOp<Expr> >: LeafCount<const TensorForcedEvalOp<Expr> > {};
|
#undef SYCLFORCEDEVALLEAFCOUNT
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is const TensorEvalToOp
|
|
||||||
template <typename Expr>
|
|
||||||
struct LeafCount<const TensorEvalToOp<Expr> > {
|
|
||||||
static const size_t Count = 1 + CategoryCount<Expr>::Count;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is const TensorReductionOp
|
|
||||||
template <typename OP, typename Dim, typename Expr>
|
|
||||||
struct LeafCount<const TensorReductionOp<OP, Dim, Expr> > {
|
|
||||||
static const size_t Count =1;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is TensorReductionOp
|
|
||||||
template <typename OP, typename Dim, typename Expr>
|
|
||||||
struct LeafCount<TensorReductionOp<OP, Dim, Expr> >: LeafCount<const TensorReductionOp<OP, Dim, Expr> >{};
|
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is const TensorSlicingOp
|
|
||||||
template <typename StartIndices, typename Sizes, typename XprType>
|
|
||||||
struct LeafCount<const TensorSlicingOp<StartIndices, Sizes, XprType> >:CategoryCount<XprType>{};
|
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is TensorSlicingOp
|
|
||||||
template <typename StartIndices, typename Sizes, typename XprType>
|
|
||||||
struct LeafCount<TensorSlicingOp<StartIndices, Sizes, XprType> >
|
|
||||||
: LeafCount<const TensorSlicingOp<StartIndices, Sizes, XprType> >{};
|
|
||||||
|
|
||||||
/// specialisation of the \ref LeafCount struct when the node type is TensorEvalToOp
|
/// specialisation of the \ref LeafCount struct when the node type is TensorEvalToOp
|
||||||
template <typename Expr>
|
#define EVALTOLEAFCOUNT(CVQual)\
|
||||||
struct LeafCount<TensorEvalToOp<Expr> >: LeafCount<const TensorEvalToOp<Expr> >{};
|
template <typename Expr>\
|
||||||
|
struct LeafCount<CVQual TensorEvalToOp<Expr> > {\
|
||||||
|
static const size_t Count = 1 + CategoryCount<Expr>::Count;\
|
||||||
|
};
|
||||||
|
|
||||||
|
EVALTOLEAFCOUNT(const)
|
||||||
|
EVALTOLEAFCOUNT()
|
||||||
|
#undef EVALTOLEAFCOUNT
|
||||||
|
|
||||||
|
/// specialisation of the \ref LeafCount struct when the node type is const TensorReductionOp
|
||||||
|
#define REDUCTIONLEAFCOUNT(CVQual)\
|
||||||
|
template <typename OP, typename Dim, typename Expr>\
|
||||||
|
struct LeafCount<CVQual TensorReductionOp<OP, Dim, Expr> > {\
|
||||||
|
static const size_t Count =1;\
|
||||||
|
};
|
||||||
|
|
||||||
|
REDUCTIONLEAFCOUNT(const)
|
||||||
|
REDUCTIONLEAFCOUNT()
|
||||||
|
#undef REDUCTIONLEAFCOUNT
|
||||||
|
|
||||||
|
/// specialisation of the \ref LeafCount struct when the node type is TensorSlicingOp
|
||||||
|
#define SLICEOPLEAFCOUNT(CVQual)\
|
||||||
|
template <typename StartIndices, typename Sizes, typename XprType>\
|
||||||
|
struct LeafCount<CVQual TensorSlicingOp<StartIndices, Sizes, XprType> >:CategoryCount<XprType>{};
|
||||||
|
|
||||||
|
SLICEOPLEAFCOUNT(const)
|
||||||
|
SLICEOPLEAFCOUNT()
|
||||||
|
#undef SLICEOPLEAFCOUNT
|
||||||
|
|
||||||
|
#define RESHAPEANDSHUFFLELEAFCOUNT(OPEXPR, CVQual)\
|
||||||
|
template<typename Param, typename XprType>\
|
||||||
|
struct LeafCount<CVQual OPEXPR<Param, XprType> >:CategoryCount<XprType>{};
|
||||||
|
|
||||||
|
RESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, const)
|
||||||
|
RESHAPEANDSHUFFLELEAFCOUNT(TensorReshapingOp, )
|
||||||
|
|
||||||
|
RESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, const)
|
||||||
|
RESHAPEANDSHUFFLELEAFCOUNT(TensorShufflingOp, )
|
||||||
|
#undef RESHAPEANDSHUFFLELEAFCOUNT
|
||||||
|
|
||||||
|
|
||||||
} /// namespace TensorSycl
|
} /// namespace TensorSycl
|
||||||
} /// namespace internal
|
} /// namespace internal
|
||||||
|
@ -180,6 +180,18 @@ SLICEOPEXPR(const)
|
|||||||
SLICEOPEXPR()
|
SLICEOPEXPR()
|
||||||
#undef SLICEOPEXPR
|
#undef SLICEOPEXPR
|
||||||
|
|
||||||
|
#define RESHAPEANDSHUFFLEOPPLH(OPEXP , CVQual)\
|
||||||
|
template<typename Param, typename XprType, size_t N>\
|
||||||
|
struct PlaceHolderExpression<CVQual OPEXP<Param, XprType>, N > {\
|
||||||
|
typedef CVQual OPEXP<Param, typename CalculateIndex<N, XprType>::ArgType> Type;\
|
||||||
|
};
|
||||||
|
|
||||||
|
RESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, const)
|
||||||
|
RESHAPEANDSHUFFLEOPPLH(TensorReshapingOp, )
|
||||||
|
|
||||||
|
RESHAPEANDSHUFFLEOPPLH(TensorShufflingOp, const)
|
||||||
|
RESHAPEANDSHUFFLEOPPLH(TensorShufflingOp,)
|
||||||
|
#undef RESHAPEANDSHUFFLEOPPLH
|
||||||
|
|
||||||
/// template deduction for \ref PlaceHolderExpression struct
|
/// template deduction for \ref PlaceHolderExpression struct
|
||||||
template <typename Expr>
|
template <typename Expr>
|
||||||
|
@ -54,7 +54,7 @@ void run(Expr &expr, Dev &dev) {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
dev.sycl_queue().throw_asynchronous();
|
dev.synchronize();
|
||||||
}
|
}
|
||||||
|
|
||||||
evaluator.cleanup();
|
evaluator.cleanup();
|
||||||
|
@ -147,6 +147,7 @@ if(EIGEN_TEST_CXX11)
|
|||||||
ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
|
||||||
ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
|
||||||
ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
|
||||||
|
ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11")
|
||||||
ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
|
||||||
endif(EIGEN_TEST_SYCL)
|
endif(EIGEN_TEST_SYCL)
|
||||||
# It should be safe to always run these tests as there is some fallback code for
|
# It should be safe to always run these tests as there is some fallback code for
|
||||||
|
@ -28,6 +28,112 @@ using Eigen::SyclDevice;
|
|||||||
using Eigen::Tensor;
|
using Eigen::Tensor;
|
||||||
using Eigen::TensorMap;
|
using Eigen::TensorMap;
|
||||||
|
|
||||||
|
template <typename DataType, int DataLayout>
|
||||||
|
static void test_simple_reshape(const Eigen::SyclDevice& sycl_device)
|
||||||
|
{
|
||||||
|
typename Tensor<DataType, 5 ,DataLayout>::Dimensions dim1(2,3,1,7,1);
|
||||||
|
typename Tensor<DataType, 3 ,DataLayout>::Dimensions dim2(2,3,7);
|
||||||
|
typename Tensor<DataType, 2 ,DataLayout>::Dimensions dim3(6,7);
|
||||||
|
typename Tensor<DataType, 2 ,DataLayout>::Dimensions dim4(2,21);
|
||||||
|
|
||||||
|
Tensor<DataType, 5, DataLayout> tensor1(dim1);
|
||||||
|
Tensor<DataType, 3, DataLayout> tensor2(dim2);
|
||||||
|
Tensor<DataType, 2, DataLayout> tensor3(dim3);
|
||||||
|
Tensor<DataType, 2, DataLayout> tensor4(dim4);
|
||||||
|
|
||||||
|
tensor1.setRandom();
|
||||||
|
|
||||||
|
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
|
||||||
|
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
|
||||||
|
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor3.size()*sizeof(DataType)));
|
||||||
|
DataType* gpu_data4 = static_cast<DataType*>(sycl_device.allocate(tensor4.size()*sizeof(DataType)));
|
||||||
|
|
||||||
|
TensorMap<Tensor<DataType, 5,DataLayout>> gpu1(gpu_data1, dim1);
|
||||||
|
TensorMap<Tensor<DataType, 3,DataLayout>> gpu2(gpu_data2, dim2);
|
||||||
|
TensorMap<Tensor<DataType, 2,DataLayout>> gpu3(gpu_data3, dim3);
|
||||||
|
TensorMap<Tensor<DataType, 2,DataLayout>> gpu4(gpu_data4, dim4);
|
||||||
|
|
||||||
|
sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
|
||||||
|
|
||||||
|
gpu2.device(sycl_device)=gpu1.reshape(dim2);
|
||||||
|
sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor1.size())*sizeof(DataType));
|
||||||
|
|
||||||
|
gpu3.device(sycl_device)=gpu1.reshape(dim3);
|
||||||
|
sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3,(tensor3.size())*sizeof(DataType));
|
||||||
|
|
||||||
|
gpu4.device(sycl_device)=gpu1.reshape(dim2).reshape(dim4);
|
||||||
|
sycl_device.memcpyDeviceToHost(tensor4.data(), gpu_data4,(tensor4.size())*sizeof(DataType));
|
||||||
|
for (int i = 0; i < 2; ++i){
|
||||||
|
for (int j = 0; j < 3; ++j){
|
||||||
|
for (int k = 0; k < 7; ++k){
|
||||||
|
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); ///ColMajor
|
||||||
|
if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
|
||||||
|
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k)); ///ColMajor
|
||||||
|
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k)); ///ColMajor
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
//VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); /// RowMajor
|
||||||
|
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j*7 +k)); /// RowMajor
|
||||||
|
VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i*3 +j,k)); /// RowMajor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sycl_device.deallocate(gpu_data1);
|
||||||
|
sycl_device.deallocate(gpu_data2);
|
||||||
|
sycl_device.deallocate(gpu_data3);
|
||||||
|
sycl_device.deallocate(gpu_data4);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename DataType, int DataLayout>
|
||||||
|
static void test_reshape_as_lvalue(const Eigen::SyclDevice& sycl_device)
|
||||||
|
{
|
||||||
|
typename Tensor<DataType, 3, DataLayout>::Dimensions dim1(2,3,7);
|
||||||
|
typename Tensor<DataType, 2, DataLayout>::Dimensions dim2(6,7);
|
||||||
|
typename Tensor<DataType, 5, DataLayout>::Dimensions dim3(2,3,1,7,1);
|
||||||
|
Tensor<DataType, 3, DataLayout> tensor(dim1);
|
||||||
|
Tensor<DataType, 2, DataLayout> tensor2d(dim2);
|
||||||
|
Tensor<DataType, 5, DataLayout> tensor5d(dim3);
|
||||||
|
|
||||||
|
tensor.setRandom();
|
||||||
|
|
||||||
|
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
|
||||||
|
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2d.size()*sizeof(DataType)));
|
||||||
|
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor5d.size()*sizeof(DataType)));
|
||||||
|
|
||||||
|
TensorMap< Tensor<DataType, 3, DataLayout> > gpu1(gpu_data1, dim1);
|
||||||
|
TensorMap< Tensor<DataType, 2, DataLayout> > gpu2(gpu_data2, dim2);
|
||||||
|
TensorMap< Tensor<DataType, 5, DataLayout> > gpu3(gpu_data3, dim3);
|
||||||
|
|
||||||
|
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
|
||||||
|
|
||||||
|
gpu2.reshape(dim1).device(sycl_device)=gpu1;
|
||||||
|
sycl_device.memcpyDeviceToHost(tensor2d.data(), gpu_data2,(tensor2d.size())*sizeof(DataType));
|
||||||
|
|
||||||
|
gpu3.reshape(dim1).device(sycl_device)=gpu1;
|
||||||
|
sycl_device.memcpyDeviceToHost(tensor5d.data(), gpu_data3,(tensor5d.size())*sizeof(DataType));
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; ++i){
|
||||||
|
for (int j = 0; j < 3; ++j){
|
||||||
|
for (int k = 0; k < 7; ++k){
|
||||||
|
VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k));
|
||||||
|
if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
|
||||||
|
VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k)); ///ColMajor
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
VERIFY_IS_EQUAL(tensor2d(i*3 +j,k),tensor(i,j,k)); /// RowMajor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sycl_device.deallocate(gpu_data1);
|
||||||
|
sycl_device.deallocate(gpu_data2);
|
||||||
|
sycl_device.deallocate(gpu_data3);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename DataType, int DataLayout>
|
template <typename DataType, int DataLayout>
|
||||||
static void test_simple_slice(const Eigen::SyclDevice &sycl_device)
|
static void test_simple_slice(const Eigen::SyclDevice &sycl_device)
|
||||||
{
|
{
|
||||||
@ -74,15 +180,19 @@ static void test_simple_slice(const Eigen::SyclDevice &sycl_device)
|
|||||||
sycl_device.deallocate(gpu_data3);
|
sycl_device.deallocate(gpu_data3);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename DataType, typename dev_Selector> void sycl_slicing_test_per_device(dev_Selector s){
|
template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_device(dev_Selector s){
|
||||||
QueueInterface queueInterface(s);
|
QueueInterface queueInterface(s);
|
||||||
auto sycl_device = Eigen::SyclDevice(&queueInterface);
|
auto sycl_device = Eigen::SyclDevice(&queueInterface);
|
||||||
test_simple_slice<DataType, RowMajor>(sycl_device);
|
test_simple_slice<DataType, RowMajor>(sycl_device);
|
||||||
test_simple_slice<DataType, ColMajor>(sycl_device);
|
test_simple_slice<DataType, ColMajor>(sycl_device);
|
||||||
|
test_simple_reshape<DataType, RowMajor>(sycl_device);
|
||||||
|
test_simple_reshape<DataType, ColMajor>(sycl_device);
|
||||||
|
test_reshape_as_lvalue<DataType, RowMajor>(sycl_device);
|
||||||
|
test_reshape_as_lvalue<DataType, ColMajor>(sycl_device);
|
||||||
}
|
}
|
||||||
void test_cxx11_tensor_morphing_sycl()
|
void test_cxx11_tensor_morphing_sycl()
|
||||||
{
|
{
|
||||||
for (const auto& device :Eigen::get_sycl_supported_devices()) {
|
for (const auto& device :Eigen::get_sycl_supported_devices()) {
|
||||||
CALL_SUBTEST(sycl_slicing_test_per_device<float>(device));
|
CALL_SUBTEST(sycl_morphing_test_per_device<float>(device));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
120
unsupported/test/cxx11_tensor_shuffling_sycl.cpp
Normal file
120
unsupported/test/cxx11_tensor_shuffling_sycl.cpp
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2016
|
||||||
|
// Mehdi Goli Codeplay Software Ltd.
|
||||||
|
// Ralph Potter Codeplay Software Ltd.
|
||||||
|
// Luke Iwanski Codeplay Software Ltd.
|
||||||
|
// Contact: <eigen@codeplay.com>
|
||||||
|
// Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
|
||||||
|
#define EIGEN_TEST_NO_LONGDOUBLE
|
||||||
|
#define EIGEN_TEST_NO_COMPLEX
|
||||||
|
#define EIGEN_TEST_FUNC cxx11_tensor_shuffling_sycl
|
||||||
|
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
|
||||||
|
#define EIGEN_USE_SYCL
|
||||||
|
|
||||||
|
|
||||||
|
#include "main.h"
|
||||||
|
#include <unsupported/Eigen/CXX11/Tensor>
|
||||||
|
|
||||||
|
using Eigen::array;
|
||||||
|
using Eigen::SyclDevice;
|
||||||
|
using Eigen::Tensor;
|
||||||
|
using Eigen::TensorMap;
|
||||||
|
|
||||||
|
template <typename DataType, int DataLayout, typename IndexTypes>
|
||||||
|
static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device)
|
||||||
|
{
|
||||||
|
IndexTypes sizeDim1 = 2;
|
||||||
|
IndexTypes sizeDim2 = 3;
|
||||||
|
IndexTypes sizeDim3 = 5;
|
||||||
|
IndexTypes sizeDim4 = 7;
|
||||||
|
array<IndexTypes, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
|
||||||
|
Tensor<DataType, 4, DataLayout,IndexTypes> tensor(tensorRange);
|
||||||
|
Tensor<DataType, 4, DataLayout,IndexTypes> no_shuffle(tensorRange);
|
||||||
|
tensor.setRandom();
|
||||||
|
|
||||||
|
const size_t buffSize =tensor.size()*sizeof(DataType);
|
||||||
|
array<IndexTypes, 4> shuffles;
|
||||||
|
shuffles[0] = 0;
|
||||||
|
shuffles[1] = 1;
|
||||||
|
shuffles[2] = 2;
|
||||||
|
shuffles[3] = 3;
|
||||||
|
DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(buffSize));
|
||||||
|
DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(buffSize));
|
||||||
|
|
||||||
|
|
||||||
|
TensorMap<Tensor<DataType, 4, DataLayout,IndexTypes>> gpu1(gpu_data1, tensorRange);
|
||||||
|
TensorMap<Tensor<DataType, 4, DataLayout,IndexTypes>> gpu2(gpu_data2, tensorRange);
|
||||||
|
|
||||||
|
sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), buffSize);
|
||||||
|
|
||||||
|
gpu2.device(sycl_device)=gpu1.shuffle(shuffles);
|
||||||
|
sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize);
|
||||||
|
|
||||||
|
VERIFY_IS_EQUAL(no_shuffle.dimension(0), sizeDim1);
|
||||||
|
VERIFY_IS_EQUAL(no_shuffle.dimension(1), sizeDim2);
|
||||||
|
VERIFY_IS_EQUAL(no_shuffle.dimension(2), sizeDim3);
|
||||||
|
VERIFY_IS_EQUAL(no_shuffle.dimension(3), sizeDim4);
|
||||||
|
|
||||||
|
for (int i = 0; i < sizeDim1; ++i) {
|
||||||
|
for (int j = 0; j < sizeDim2; ++j) {
|
||||||
|
for (int k = 0; k < sizeDim3; ++k) {
|
||||||
|
for (int l = 0; l < sizeDim4; ++l) {
|
||||||
|
VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
shuffles[0] = 2;
|
||||||
|
shuffles[1] = 3;
|
||||||
|
shuffles[2] = 1;
|
||||||
|
shuffles[3] = 0;
|
||||||
|
array<IndexTypes, 4> tensorrangeShuffle = {{sizeDim3, sizeDim4, sizeDim2, sizeDim1}};
|
||||||
|
Tensor<DataType, 4, DataLayout,IndexTypes> shuffle(tensorrangeShuffle);
|
||||||
|
DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(buffSize));
|
||||||
|
TensorMap<Tensor<DataType, 4,DataLayout,IndexTypes>> gpu3(gpu_data3, tensorrangeShuffle);
|
||||||
|
|
||||||
|
gpu3.device(sycl_device)=gpu1.shuffle(shuffles);
|
||||||
|
sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize);
|
||||||
|
|
||||||
|
VERIFY_IS_EQUAL(shuffle.dimension(0), sizeDim3);
|
||||||
|
VERIFY_IS_EQUAL(shuffle.dimension(1), sizeDim4);
|
||||||
|
VERIFY_IS_EQUAL(shuffle.dimension(2), sizeDim2);
|
||||||
|
VERIFY_IS_EQUAL(shuffle.dimension(3), sizeDim1);
|
||||||
|
|
||||||
|
for (int i = 0; i < sizeDim1; ++i) {
|
||||||
|
for (int j = 0; j < sizeDim2; ++j) {
|
||||||
|
for (int k = 0; k < sizeDim3; ++k) {
|
||||||
|
for (int l = 0; l < sizeDim4; ++l) {
|
||||||
|
VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename DataType, typename dev_Selector> void sycl_shuffling_test_per_device(dev_Selector s){
|
||||||
|
QueueInterface queueInterface(s);
|
||||||
|
auto sycl_device = Eigen::SyclDevice(&queueInterface);
|
||||||
|
test_simple_shuffling_sycl<DataType, RowMajor, int>(sycl_device);
|
||||||
|
test_simple_shuffling_sycl<DataType, ColMajor, int>(sycl_device);
|
||||||
|
|
||||||
|
test_simple_shuffling_sycl<DataType, RowMajor, int64_t>(sycl_device);
|
||||||
|
test_simple_shuffling_sycl<DataType, ColMajor, int64_t>(sycl_device);
|
||||||
|
|
||||||
|
}
|
||||||
|
void test_cxx11_tensor_shuffling_sycl()
|
||||||
|
{
|
||||||
|
for (const auto& device :Eigen::get_sycl_supported_devices()) {
|
||||||
|
CALL_SUBTEST(sycl_shuffling_test_per_device<float>(device));
|
||||||
|
}
|
||||||
|
}
|
@ -197,7 +197,6 @@ template<typename DataType, typename dev_Selector> void sycl_computing_test_per_
|
|||||||
test_sycl_computations<DataType, ColMajor>(sycl_device);
|
test_sycl_computations<DataType, ColMajor>(sycl_device);
|
||||||
}
|
}
|
||||||
void test_cxx11_tensor_sycl() {
|
void test_cxx11_tensor_sycl() {
|
||||||
auto devices =Eigen::get_sycl_supported_devices();
|
|
||||||
for (const auto& device :Eigen::get_sycl_supported_devices()) {
|
for (const auto& device :Eigen::get_sycl_supported_devices()) {
|
||||||
CALL_SUBTEST(sycl_computing_test_per_device<float>(device));
|
CALL_SUBTEST(sycl_computing_test_per_device<float>(device));
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user