[SYCL] This PR adds the minimum modifications to the Eigen unsupported module required to run it on devices supporting SYCL.

* Abstracting the pointer type so that both SYCL memory and pointer can be captured.
* Converting SYCL virtual pointer to SYCL device memory in Eigen evaluator class.
* Binding SYCL placeholder accessor to command group handler by using bind method in Eigen evaluator node.
* Adding SYCL macro for controlling loop unrolling.
* Modifying the TensorDeviceSycl.h and SYCL executor method to adopt the above changes.
This commit is contained in:
Mehdi Goli 2019-06-28 10:08:23 +01:00
parent 16a56b2ddd
commit 7d08fa805a
47 changed files with 1983 additions and 951 deletions

View File

@ -37,7 +37,7 @@ struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType>
template<typename XprType> template<typename XprType>
struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense> struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense>
{ {
typedef const TensorIndexTupleOp<XprType>& type; typedef const TensorIndexTupleOp<XprType>EIGEN_DEVICE_REF type;
}; };
template<typename XprType> template<typename XprType>
@ -82,6 +82,8 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
static const int NumDims = internal::array_size<Dimensions>::value; static const int NumDims = internal::array_size<Dimensions>::value;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
@ -100,7 +102,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
return m_impl.dimensions(); return m_impl.dimensions();
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -118,11 +120,11 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1); return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1);
} }
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
return m_impl; m_impl.bind(cgh);
} }
#endif #endif
@ -154,7 +156,7 @@ struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<Xp
template<typename ReduceOp, typename Dims, typename XprType> template<typename ReduceOp, typename Dims, typename XprType>
struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense> struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
{ {
typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>& type; typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>EIGEN_DEVICE_REF type;
}; };
template<typename ReduceOp, typename Dims, typename XprType> template<typename ReduceOp, typename Dims, typename XprType>
@ -216,6 +218,9 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions; typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions;
static const int NumDims = internal::array_size<InputDimensions>::value; static const int NumDims = internal::array_size<InputDimensions>::value;
typedef array<Index, NumDims> StrideDims; typedef array<Index, NumDims> StrideDims;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
typedef StorageMemory<TupleType, Device> TupleStorageMem;
enum { enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
@ -231,9 +236,6 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
: m_orig_impl(op.expression(), device), : m_orig_impl(op.expression(), device),
m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
m_return_dim(op.return_dim()) m_return_dim(op.return_dim())
#ifdef EIGEN_USE_SYCL
,m_device(device)
#endif
{ {
gen_strides(m_orig_impl.dimensions(), m_strides); gen_strides(m_orig_impl.dimensions(), m_strides);
if (Layout == static_cast<int>(ColMajor)) { if (Layout == static_cast<int>(ColMajor)) {
@ -243,14 +245,17 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
const Index total_size = internal::array_prod(m_orig_impl.dimensions()); const Index total_size = internal::array_prod(m_orig_impl.dimensions());
m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size; m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size;
} }
m_stride_div = (m_return_dim >= 0) ? m_strides[m_return_dim] : 1; // If m_return_dim is not a valid index, returns 1 or this can crash on Windows.
m_stride_div = ((m_return_dim >= 0) &&
(m_return_dim < static_cast<Index>(m_strides.size())))
? m_strides[m_return_dim] : 1;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
return m_impl.dimensions(); return m_impl.dimensions();
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -263,16 +268,13 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div;
} }
#ifndef EIGEN_USE_SYCL EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } #ifdef EIGEN_USE_SYCL
#else // following functions are required by sycl EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TupleType* data() const { return m_impl.data(); } m_impl.bind(cgh);
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index return_dim() const {return m_return_dim;} m_orig_impl.bind(cgh);
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StrideDims& strides() const {return m_strides;} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& stride_mod() const {return m_stride_mod;} #endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& stride_div() const {return m_stride_div;}
const Device& device() const{return m_device;}
#endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
costPerCoeff(bool vectorized) const { costPerCoeff(bool vectorized) const {
@ -312,9 +314,6 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
StrideDims m_strides; StrideDims m_strides;
Index m_stride_mod; Index m_stride_mod;
Index m_stride_div; Index m_stride_div;
#ifdef EIGEN_USE_SYCL
const Device& m_device;
#endif
}; };
} // end namespace Eigen } // end namespace Eigen

View File

@ -97,6 +97,8 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static const int NumDims = XprType::NumDims; static const int NumDims = XprType::NumDims;
@ -136,7 +138,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
return m_rightImpl.dimensions(); return m_rightImpl.dimensions();
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
m_leftImpl.evalSubExprsIfNeeded(NULL); m_leftImpl.evalSubExprsIfNeeded(NULL);
// If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non
@ -154,6 +156,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i); m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned; const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned;
const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned; const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned;
m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i)); m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i));
@ -199,13 +202,15 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
m_leftImpl.writeBlock(*block); m_leftImpl.writeBlock(*block);
} }
} }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_leftImpl.bind(cgh);
m_rightImpl.bind(cgh);
}
#endif
/// required by sycl in order to extract the accessor EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_leftImpl.data(); }
const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_leftImpl.data(); }
private: private:
TensorEvaluator<LeftArgType, Device> m_leftImpl; TensorEvaluator<LeftArgType, Device> m_leftImpl;

View File

@ -841,7 +841,7 @@ struct TensorBlockView {
const Scalar* data() const { return m_data; } const Scalar* data() const { return m_data; }
private: private:
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
Dimensions m_block_sizes, m_block_strides; Dimensions m_block_sizes, m_block_strides;
const Scalar* m_data; // Not owned. const Scalar* m_data; // Not owned.
Scalar* m_allocated_data; // Owned. Scalar* m_allocated_data; // Owned.

View File

@ -37,7 +37,7 @@ struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType
template<typename Broadcast, typename XprType> template<typename Broadcast, typename XprType>
struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense> struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense>
{ {
typedef const TensorBroadcastingOp<Broadcast, XprType>& type; typedef const TensorBroadcastingOp<Broadcast, XprType> EIGEN_DEVICE_REF type;
}; };
template<typename Broadcast, typename XprType> template<typename Broadcast, typename XprType>
@ -105,7 +105,11 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout;
bool isCopy, nByOne, oneByN; bool isCopy, nByOne, oneByN;
public:
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = true, IsAligned = true,
@ -205,7 +209,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -238,6 +242,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
// TODO: attempt to speed this up. The integer divisions and modulo are slow // TODO: attempt to speed this up. The integer divisions and modulo are slow
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexColMajor(Index index) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexColMajor(Index index) const {
Index inputIndex = 0; Index inputIndex = 0;
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStrides[i]; const Index idx = index / m_outputStrides[i];
if (internal::index_statically_eq<Broadcast>(i, 1)) { if (internal::index_statically_eq<Broadcast>(i, 1)) {
@ -272,6 +277,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexRowMajor(Index index) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexRowMajor(Index index) const {
Index inputIndex = 0; Index inputIndex = 0;
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / m_outputStrides[i]; const Index idx = index / m_outputStrides[i];
if (internal::index_statically_eq<Broadcast>(i, 1)) { if (internal::index_statically_eq<Broadcast>(i, 1)) {
@ -376,6 +382,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
values[0] = m_impl.coeff(inputIndex); values[0] = m_impl.coeff(inputIndex);
return internal::pload1<PacketReturnType>(values); return internal::pload1<PacketReturnType>(values);
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) { for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
if (outputOffset + cur < m_outputStrides[endDim]) { if (outputOffset + cur < m_outputStrides[endDim]) {
values[i] = m_impl.coeff(inputIndex); values[i] = m_impl.coeff(inputIndex);
@ -410,6 +417,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
return m_impl.template packet<Unaligned>(inputIndex); return m_impl.template packet<Unaligned>(inputIndex);
} else { } else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
if (inputIndex > m_inputStrides[dim]-1) { if (inputIndex > m_inputStrides[dim]-1) {
inputIndex = 0; inputIndex = 0;
@ -441,6 +449,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
values[0] = m_impl.coeff(inputIndex); values[0] = m_impl.coeff(inputIndex);
return internal::pload1<PacketReturnType>(values); return internal::pload1<PacketReturnType>(values);
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) { for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
if (outputOffset + cur < m_outputStrides[dim]) { if (outputOffset + cur < m_outputStrides[dim]) {
values[i] = m_impl.coeff(inputIndex); values[i] = m_impl.coeff(inputIndex);
@ -465,6 +474,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
const Index originalIndex = index; const Index originalIndex = index;
Index inputIndex = 0; Index inputIndex = 0;
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStrides[i]; const Index idx = index / m_outputStrides[i];
if (internal::index_statically_eq<Broadcast>(i, 1)) { if (internal::index_statically_eq<Broadcast>(i, 1)) {
@ -500,6 +510,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
} else { } else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
values[0] = m_impl.coeff(inputIndex); values[0] = m_impl.coeff(inputIndex);
EIGEN_UNROLL_LOOP
for (int i = 1; i < PacketSize; ++i) { for (int i = 1; i < PacketSize; ++i) {
if (innermostLoc + i < m_impl.dimensions()[0]) { if (innermostLoc + i < m_impl.dimensions()[0]) {
values[i] = m_impl.coeff(inputIndex+i); values[i] = m_impl.coeff(inputIndex+i);
@ -521,6 +532,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
const Index originalIndex = index; const Index originalIndex = index;
Index inputIndex = 0; Index inputIndex = 0;
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / m_outputStrides[i]; const Index idx = index / m_outputStrides[i];
if (internal::index_statically_eq<Broadcast>(i, 1)) { if (internal::index_statically_eq<Broadcast>(i, 1)) {
@ -556,6 +568,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
} else { } else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
values[0] = m_impl.coeff(inputIndex); values[0] = m_impl.coeff(inputIndex);
EIGEN_UNROLL_LOOP
for (int i = 1; i < PacketSize; ++i) { for (int i = 1; i < PacketSize; ++i) {
if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) { if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) {
values[i] = m_impl.coeff(inputIndex+i); values[i] = m_impl.coeff(inputIndex+i);
@ -572,6 +585,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
costPerCoeff(bool vectorized) const { costPerCoeff(bool vectorized) const {
double compute_cost = TensorOpCost::AddCost<Index>(); double compute_cost = TensorOpCost::AddCost<Index>();
if (!isCopy && NumDims > 0) { if (!isCopy && NumDims > 0) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
compute_cost += TensorOpCost::DivCost<Index>(); compute_cost += TensorOpCost::DivCost<Index>();
if (internal::index_statically_eq<Broadcast>(i, 1)) { if (internal::index_statically_eq<Broadcast>(i, 1)) {
@ -845,12 +859,17 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
} }
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
Broadcast functor() const { return m_broadcast; } Broadcast functor() const { return m_broadcast; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif
private: private:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void BroadcastBlock( EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void BroadcastBlock(
const Dimensions& input_block_sizes, const Dimensions& input_block_sizes,
@ -874,9 +893,9 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
BroadcastTensorBlockReader::Run(&broadcast_block, input_block.data()); BroadcastTensorBlockReader::Run(&broadcast_block, input_block.data());
} }
protected: protected:
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
const Broadcast m_broadcast; const typename internal::remove_reference<Broadcast>::type m_broadcast;
Dimensions m_dimensions; Dimensions m_dimensions;
array<Index, NumDims> m_outputStrides; array<Index, NumDims> m_outputStrides;
array<Index, NumDims> m_inputStrides; array<Index, NumDims> m_inputStrides;

View File

@ -38,7 +38,7 @@ struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
template<DenseIndex DimId, typename XprType> template<DenseIndex DimId, typename XprType>
struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense> struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
{ {
typedef const TensorChippingOp<DimId, XprType>& type; typedef const TensorChippingOp<DimId, XprType> EIGEN_DEVICE_REF type;
}; };
template<DenseIndex DimId, typename XprType> template<DenseIndex DimId, typename XprType>
@ -139,7 +139,8 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
// Alignment can't be guaranteed at compile time since it depends on the // Alignment can't be guaranteed at compile time since it depends on the
@ -169,7 +170,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
OutputTensorBlock; OutputTensorBlock;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_dim(op.dim()), m_device(device), m_offset(op.offset()) : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
{ {
EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
eigen_assert(NumInputDims > m_dim.actualDim()); eigen_assert(NumInputDims > m_dim.actualDim());
@ -218,7 +219,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -243,6 +244,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
eigen_assert(m_stride == 1); eigen_assert(m_stride == 1);
Index inputIndex = index * m_inputStride + m_inputOffset; Index inputIndex = index * m_inputStride + m_inputOffset;
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = m_impl.coeff(inputIndex); values[i] = m_impl.coeff(inputIndex);
inputIndex += m_inputStride; inputIndex += m_inputStride;
@ -262,6 +264,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
} else { } else {
// Cross the stride boundary. Fallback to slow path. // Cross the stride boundary. Fallback to slow path.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index); values[i] = coeff(index);
++index; ++index;
@ -349,26 +352,20 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
m_impl.block(&input_block); m_impl.block(&input_block);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data()); typename Storage::Type result = constCast(m_impl.data());
if (IsOuterChipping && result) { if (IsOuterChipping && result) {
return result + m_inputOffset; return result + m_inputOffset;
} else { } else {
return NULL; return NULL;
} }
} }
#ifdef EIGEN_USE_SYCL
/// used by sycl // binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex dimId() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
return m_dim.actualDim(); m_impl.bind(cgh);
} }
#endif
/// used by sycl
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DenseIndex& offset() const {
return m_offset;
}
/// required by sycl in order to extract the accessor
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
protected: protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
@ -399,10 +396,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
DSizes<Index, NumInputDims> m_inputStrides; DSizes<Index, NumInputDims> m_inputStrides;
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
const internal::DimensionId<DimId> m_dim; const internal::DimensionId<DimId> m_dim;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
// required by sycl
const DenseIndex m_offset;
}; };
@ -466,6 +460,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x); internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
Index inputIndex = index * this->m_inputStride + this->m_inputOffset; Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
this->m_impl.coeffRef(inputIndex) = values[i]; this->m_impl.coeffRef(inputIndex) = values[i];
inputIndex += this->m_inputStride; inputIndex += this->m_inputStride;
@ -484,6 +479,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
// Cross stride boundary. Fallback to slow path. // Cross stride boundary. Fallback to slow path.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x); internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
this->coeffRef(index) = values[i]; this->coeffRef(index) = values[i];
++index; ++index;

View File

@ -119,6 +119,8 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess, PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
@ -181,7 +183,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
// TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear? // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear?
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
{ {
m_leftImpl.evalSubExprsIfNeeded(NULL); m_leftImpl.evalSubExprsIfNeeded(NULL);
m_rightImpl.evalSubExprsIfNeeded(NULL); m_rightImpl.evalSubExprsIfNeeded(NULL);
@ -219,11 +221,13 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
Index left_index; Index left_index;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
left_index = subs[0]; left_index = subs[0];
EIGEN_UNROLL_LOOP
for (int i = 1; i < NumDims; ++i) { for (int i = 1; i < NumDims; ++i) {
left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
} }
} else { } else {
left_index = subs[NumDims - 1]; left_index = subs[NumDims - 1];
EIGEN_UNROLL_LOOP
for (int i = NumDims - 2; i >= 0; --i) { for (int i = NumDims - 2; i >= 0; --i) {
left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
} }
@ -235,11 +239,13 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
Index right_index; Index right_index;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
right_index = subs[0]; right_index = subs[0];
EIGEN_UNROLL_LOOP
for (int i = 1; i < NumDims; ++i) { for (int i = 1; i < NumDims; ++i) {
right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
} }
} else { } else {
right_index = subs[NumDims - 1]; right_index = subs[NumDims - 1];
EIGEN_UNROLL_LOOP
for (int i = NumDims - 2; i >= 0; --i) { for (int i = NumDims - 2; i >= 0; --i) {
right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
} }
@ -257,6 +263,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < packetSize; ++i) { for (int i = 0; i < packetSize; ++i) {
values[i] = coeff(index+i); values[i] = coeff(index+i);
} }
@ -279,13 +286,15 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
TensorOpCost(0, 0, compute_cost); TensorOpCost(0, 0, compute_cost);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; } #ifdef EIGEN_USE_SYCL
/// required by sycl in order to extract the accessor // binding placeholder accessors to a command group handler for SYCL
const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
/// required by sycl in order to extract the accessor m_leftImpl.bind(cgh);
const Axis& axis() const { return m_axis; } m_rightImpl.bind(cgh);
}
#endif
protected: protected:
Dimensions m_dimensions; Dimensions m_dimensions;

View File

@ -433,6 +433,8 @@ struct TensorContractionEvaluatorBase
typedef typename XprType::Index Index; typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = true, IsAligned = true,
@ -453,6 +455,9 @@ struct TensorContractionEvaluatorBase
typedef typename internal::conditional< typedef typename internal::conditional<
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluatorType;
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluatorType;
static const int LDims = static const int LDims =
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value; internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
static const int RDims = static const int RDims =
@ -653,14 +658,14 @@ struct TensorContractionEvaluatorBase
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar * data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_leftImpl.evalSubExprsIfNeeded(NULL); m_leftImpl.evalSubExprsIfNeeded(NULL);
m_rightImpl.evalSubExprsIfNeeded(NULL); m_rightImpl.evalSubExprsIfNeeded(NULL);
if (data) { if (data) {
evalTo(data); evalTo(data);
return false; return false;
} else { } else {
m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); m_result = static_cast<EvaluatorPointerType>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
evalTo(m_result); evalTo(m_result);
return true; return true;
} }
@ -934,7 +939,7 @@ struct TensorContractionEvaluatorBase
return internal::ploadt<PacketReturnType, LoadMode>(m_result + index); return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { return m_result; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return m_result; }
protected: protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void EnableXSMMIfPossible(const array<IndexPair<Index>, ContractDims>& eval_op_indices) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void EnableXSMMIfPossible(const array<IndexPair<Index>, ContractDims>& eval_op_indices) {
@ -1169,9 +1174,9 @@ protected:
TensorEvaluator<EvalLeftArgType, Device> m_leftImpl; TensorEvaluator<EvalLeftArgType, Device> m_leftImpl;
TensorEvaluator<EvalRightArgType, Device> m_rightImpl; TensorEvaluator<EvalRightArgType, Device> m_rightImpl;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
OutputKernelType m_output_kernel; OutputKernelType m_output_kernel;
Scalar* m_result; EvaluatorPointerType m_result;
bool m_can_use_xsmm; bool m_can_use_xsmm;
}; };

View File

@ -59,6 +59,13 @@ struct CoeffLoader {
return m_tensor.template packet<LoadMode>(index); return m_tensor.template packet<LoadMode>(index);
} }
#ifdef EIGEN_USE_SYCL
// The placeholder accessors require to be bound to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_tensor.bind(cgh);
}
#endif
private: private:
const Tensor m_tensor; const Tensor m_tensor;
}; };
@ -87,6 +94,13 @@ struct CoeffLoader<Tensor, true, MakePointer_> {
{ {
return internal::ploadt_ro<typename Tensor::PacketReturnType, LoadMode>(m_data + index); return internal::ploadt_ro<typename Tensor::PacketReturnType, LoadMode>(m_data + index);
} }
#ifdef EIGEN_USE_SYCL
// The placeholder accessors require to be bound to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_data.bind(cgh);
}
#endif
private: private:
typedef typename Tensor::Scalar Scalar; typedef typename Tensor::Scalar Scalar;
@ -139,6 +153,7 @@ class SimpleTensorContractionMapper {
EIGEN_UNUSED_VARIABLE(left); // annoying bug in g++8.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85963 EIGEN_UNUSED_VARIABLE(left); // annoying bug in g++8.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85963
Index nocontract_val = left ? row : col; Index nocontract_val = left ? row : col;
Index linidx = 0; Index linidx = 0;
EIGEN_UNROLL_LOOP
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
const Index idx = nocontract_val / m_ij_strides[i]; const Index idx = nocontract_val / m_ij_strides[i];
linidx += idx * m_nocontract_strides[i]; linidx += idx * m_nocontract_strides[i];
@ -155,6 +170,7 @@ class SimpleTensorContractionMapper {
Index contract_val = left ? col : row; Index contract_val = left ? col : row;
if(array_size<contract_t>::value > 0) { if(array_size<contract_t>::value > 0) {
EIGEN_UNROLL_LOOP
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
const Index idx = contract_val / m_k_strides[i]; const Index idx = contract_val / m_k_strides[i];
linidx += idx * m_contract_strides[i]; linidx += idx * m_contract_strides[i];
@ -179,6 +195,7 @@ class SimpleTensorContractionMapper {
Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
Index linidx[2] = {0, 0}; Index linidx[2] = {0, 0};
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
EIGEN_UNROLL_LOOP
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
const Index idx0 = nocontract_val[0] / m_ij_strides[i]; const Index idx0 = nocontract_val[0] / m_ij_strides[i];
const Index idx1 = nocontract_val[1] / m_ij_strides[i]; const Index idx1 = nocontract_val[1] / m_ij_strides[i];
@ -199,6 +216,7 @@ class SimpleTensorContractionMapper {
Index contract_val[2] = {left ? col : row, left ? col : row + distance}; Index contract_val[2] = {left ? col : row, left ? col : row + distance};
if (array_size<contract_t>::value> 0) { if (array_size<contract_t>::value> 0) {
EIGEN_UNROLL_LOOP
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
const Index idx0 = contract_val[0] / m_k_strides[i]; const Index idx0 = contract_val[0] / m_k_strides[i];
const Index idx1 = contract_val[1] / m_k_strides[i]; const Index idx1 = contract_val[1] / m_k_strides[i];
@ -230,6 +248,13 @@ class SimpleTensorContractionMapper {
return ((side == Lhs) && inner_dim_contiguous && array_size<contract_t>::value > 0) ? m_contract_strides[0] : 1; return ((side == Lhs) && inner_dim_contiguous && array_size<contract_t>::value > 0) ? m_contract_strides[0] : 1;
} }
#ifdef EIGEN_USE_SYCL
// The placeholder accessors require to be bound to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_tensor.bind(cgh);
}
#endif
const CoeffLoader<Tensor, Tensor::RawAccess, MakePointer_>& tensor() const { const CoeffLoader<Tensor, Tensor::RawAccess, MakePointer_>& tensor() const {
return m_tensor; return m_tensor;
} }
@ -302,6 +327,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
EIGEN_ALIGN_MAX Scalar data[packet_size]; EIGEN_ALIGN_MAX Scalar data[packet_size];
data[0] = this->m_tensor.coeff(first); data[0] = this->m_tensor.coeff(first);
EIGEN_UNROLL_LOOP
for (Index k = 1; k < packet_size - 1; k += 2) { for (Index k = 1; k < packet_size - 1; k += 2) {
const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1); const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1);
data[k] = this->m_tensor.coeff(internal_pair.first); data[k] = this->m_tensor.coeff(internal_pair.first);
@ -472,6 +498,13 @@ class TensorContractionSubMapper {
return false; return false;
} }
#ifdef EIGEN_USE_SYCL
// The placeholder accessors require to be bound to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_base_mapper.bind(cgh);
}
#endif
const ParentMapper& base_mapper() const { return m_base_mapper; } const ParentMapper& base_mapper() const { return m_base_mapper; }
Index vert_offset() const { return m_vert_offset; } Index vert_offset() const { return m_vert_offset; }
Index horiz_offset() const { return m_horiz_offset; } Index horiz_offset() const { return m_horiz_offset; }
@ -515,6 +548,22 @@ class TensorContractionInputMapper
}; };
template <typename T> struct TensorContractionInputMapperTrait;
template<typename Scalar_, typename Index_, int side_,
typename Tensor_,
typename nocontract_t_, typename contract_t_,
int packet_size_,
bool inner_dim_contiguous_, bool inner_dim_reordered_, int Alignment_, template <class> class MakePointer_>
struct TensorContractionInputMapperTrait<TensorContractionInputMapper<Scalar_, Index_, side_, Tensor_,
nocontract_t_, contract_t_, packet_size_, inner_dim_contiguous_,
inner_dim_reordered_, Alignment_, MakePointer_> > {
typedef Tensor_ XprType;
static const bool inner_dim_contiguous = inner_dim_contiguous_;
static const bool inner_dim_reordered = inner_dim_reordered_;
};
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View File

@ -129,6 +129,7 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
typedef typename internal::unpacket_traits<TgtPacket>::type TgtType; typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
internal::scalar_cast_op<SrcType, TgtType> converter; internal::scalar_cast_op<SrcType, TgtType> converter;
EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize]; EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < TgtPacketSize; ++i) { for (int i = 0; i < TgtPacketSize; ++i) {
values[i] = converter(m_impl.coeff(index+i)); values[i] = converter(m_impl.coeff(index+i));
} }
@ -164,15 +165,15 @@ class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprT
typename XprType::Nested m_xpr; typename XprType::Nested m_xpr;
}; };
template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval { template <bool SameType, typename Eval, typename EvalPointerType> struct ConversionSubExprEval {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) {
impl.evalSubExprsIfNeeded(NULL); impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
}; };
template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> { template <typename Eval, typename EvalPointerType> struct ConversionSubExprEval<true, Eval, EvalPointerType> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) {
return impl.evalSubExprsIfNeeded(data); return impl.evalSubExprsIfNeeded(data);
} }
}; };
@ -207,6 +208,7 @@ struct PacketConv {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
internal::scalar_cast_op<SrcType, TargetType> converter; internal::scalar_cast_op<SrcType, TargetType> converter;
EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = converter(impl.coeff(index+i)); values[i] = converter(impl.coeff(index+i));
} }
@ -267,10 +269,18 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
typedef typename PacketType<SrcType, Device>::type PacketSourceType; typedef typename PacketType<SrcType, Device>::type PacketSourceType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
static const bool IsSameType = internal::is_same<TargetType, SrcType>::value; static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = true, PacketAccess =
#ifndef EIGEN_USE_SYCL
true,
#else
TensorEvaluator<ArgType, Device>::PacketAccess &
internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
#endif
BlockAccess = false, BlockAccess = false,
PreferBlockAccess = false, PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout, Layout = TensorEvaluator<ArgType, Device>::Layout,
@ -284,9 +294,9 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
{ {
return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data); return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType>::run(m_impl, data);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
@ -330,10 +340,16 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
} }
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
/// required by sycl in order to extract the sycl accessor /// required by sycl in order to extract the sycl accessor
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif
protected: protected:
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;

View File

@ -303,6 +303,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned, IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
@ -469,7 +471,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
PacketSize)); PacketSize));
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
private: private:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
@ -525,7 +527,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
m_local_kernel = false; m_local_kernel = false;
} else { } else {
size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
Scalar* local = (Scalar*)m_device.allocate(kernel_sz); Scalar* local = (Scalar*)m_device.allocate_temp(kernel_sz);
typedef TensorEvalToOp<const KernelArgType> EvalTo; typedef TensorEvalToOp<const KernelArgType> EvalTo;
EvalTo evalToTmp(local, m_kernelArg); EvalTo evalToTmp(local, m_kernelArg);
const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value; const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
@ -548,7 +550,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
KernelArgType m_kernelArg; KernelArgType m_kernelArg;
const Scalar* m_kernel; const Scalar* m_kernel;
bool m_local_kernel; bool m_local_kernel;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
}; };

View File

@ -36,7 +36,7 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
template<typename CustomUnaryFunc, typename XprType> template<typename CustomUnaryFunc, typename XprType>
struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense> struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense>
{ {
typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type; typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>EIGEN_DEVICE_REF type;
}; };
template<typename CustomUnaryFunc, typename XprType> template<typename CustomUnaryFunc, typename XprType>
@ -88,7 +88,9 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename PointerType<CoeffReturnType, Device>::Type PointerT; typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -108,20 +110,20 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
if (data) { if (data) {
evalTo(data); evalTo(data);
return false; return false;
} else { } else {
m_result = static_cast<PointerT>( m_result = static_cast<EvaluatorPointerType>(m_device.get( (CoeffReturnType*)
m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))));
evalTo(m_result); evalTo(m_result);
return true; return true;
} }
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
if (m_result != NULL) { if (m_result) {
m_device.deallocate_temp(m_result); m_device.deallocate_temp(m_result);
m_result = NULL; m_result = NULL;
} }
@ -141,22 +143,25 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC PointerT data() const { return m_result; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } // binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_result.bind(cgh);
}
#endif #endif
protected: protected:
EIGEN_DEVICE_FUNC void evalTo(PointerT data) { EIGEN_DEVICE_FUNC void evalTo(EvaluatorPointerType data) {
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions); TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(m_device.get(data), m_dimensions);
m_op.func().eval(m_op.expression(), result, m_device); m_op.func().eval(m_op.expression(), result, m_device);
} }
Dimensions m_dimensions; Dimensions m_dimensions;
const ArgType m_op; const ArgType m_op;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
PointerT m_result; EvaluatorPointerType m_result;
}; };
@ -251,7 +256,10 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -271,12 +279,13 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
if (data) { if (data) {
evalTo(data); evalTo(data);
return false; return false;
} else { } else {
m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType))); m_result = static_cast<EvaluatorPointerType>(m_device.get( (CoeffReturnType*)
m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType))));
evalTo(m_result); evalTo(m_result);
return true; return true;
} }
@ -303,22 +312,25 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC PointerT data() const { return m_result; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } // binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_result.bind(cgh);
}
#endif #endif
protected: protected:
EIGEN_DEVICE_FUNC void evalTo(PointerT data) { EIGEN_DEVICE_FUNC void evalTo(EvaluatorPointerType data) {
TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions); TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(m_device.get(data), m_dimensions);
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
} }
Dimensions m_dimensions; Dimensions m_dimensions;
const XprType m_op; const XprType m_op;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
PointerT m_result; EvaluatorPointerType m_result;
}; };

View File

@ -39,6 +39,10 @@ struct DefaultDevice {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
::memset(buffer, c, n); ::memset(buffer, c, n);
} }
template<typename Type>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const {
return data;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
#if !defined(EIGEN_GPU_COMPILE_PHASE) #if !defined(EIGEN_GPU_COMPILE_PHASE)
@ -54,7 +58,7 @@ struct DefaultDevice {
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__) #if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY)
// Running on the host CPU // Running on the host CPU
return l1CacheSize(); return l1CacheSize();
#elif defined(EIGEN_HIP_DEVICE_COMPILE) #elif defined(EIGEN_HIP_DEVICE_COMPILE)
@ -67,7 +71,7 @@ struct DefaultDevice {
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__) #if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY)
// Running single threaded on the host CPU // Running single threaded on the host CPU
return l3CacheSize(); return l3CacheSize();
#elif defined(EIGEN_HIP_DEVICE_COMPILE) #elif defined(EIGEN_HIP_DEVICE_COMPILE)

View File

@ -215,6 +215,10 @@ struct GpuDevice {
stream_->deallocate(buffer); stream_->deallocate(buffer);
} }
template<typename Type>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const {
return data;
}
EIGEN_STRONG_INLINE void* scratchpad() const { EIGEN_STRONG_INLINE void* scratchpad() const {
return stream_->scratchpad(); return stream_->scratchpad();

File diff suppressed because it is too large Load Diff

View File

@ -76,6 +76,11 @@ struct ThreadPoolDevice {
deallocate(buffer); deallocate(buffer);
} }
template<typename Type>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const {
return data;
}
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
#ifdef __ANDROID__ #ifdef __ANDROID__
::memcpy(dst, src, n); ::memcpy(dst, src, n);

View File

@ -42,7 +42,6 @@ struct traits<TensorEvalToOp<XprType, MakePointer_> >
// Intermediate typedef to workaround MSVC issue. // Intermediate typedef to workaround MSVC issue.
typedef MakePointer_<T> MakePointerT; typedef MakePointer_<T> MakePointerT;
typedef typename MakePointerT::Type Type; typedef typename MakePointerT::Type Type;
typedef typename MakePointerT::RefType RefType;
}; };
@ -103,7 +102,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
@ -115,22 +116,16 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
}; };
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_device(device), : m_impl(op.expression(), device), m_buffer(device.get(op.buffer())), m_expression(op.expression()){}
m_buffer(op.buffer()), m_op(op), m_expression(op.expression())
{ }
// Used for accessor extraction in SYCL Managed TensorMap:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const {
return m_op;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() {
} }
typedef typename internal::traits<const TensorEvalToOp<ArgType, MakePointer_> >::template MakePointer<CoeffReturnType>::Type DevicePointer;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType scalar) {
EIGEN_UNUSED_VARIABLE(scalar); EIGEN_UNUSED_VARIABLE(scalar);
eigen_assert(scalar == NULL); eigen_assert(scalar == NULL);
return m_impl.evalSubExprsIfNeeded(m_buffer); return m_impl.evalSubExprsIfNeeded(m_buffer);
@ -165,19 +160,20 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_buffer; }
ArgType expression() const { return m_expression; } ArgType expression() const { return m_expression; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
m_buffer.bind(cgh);
}
#endif
/// required by sycl in order to extract the accessor
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
/// added for sycl in order to construct the buffer from the sycl device
const Device& device() const{return m_device;}
private: private:
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
const Device& m_device; EvaluatorPointerType m_buffer;
DevicePointer m_buffer;
const XprType& m_op;
const ArgType m_expression; const ArgType m_expression;
}; };

View File

@ -34,6 +34,9 @@ struct TensorEvaluator
typedef typename Derived::Dimensions Dimensions; typedef typename Derived::Dimensions Dimensions;
typedef Derived XprType; typedef Derived XprType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
// NumDimensions is -1 for variable dim tensors // NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
@ -60,16 +63,17 @@ struct TensorEvaluator
TensorBlockWriter; TensorBlockWriter;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
: m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) : m_data(device.get((const_cast<TensorPointerType>(m.data())))),
m_dims(m.dimensions()),
m_device(device)
{ } { }
// Used for accessor extraction in SYCL Managed TensorMap:
const Derived& derived() const { return m_impl; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) { if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize()); m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
return false; return false;
} }
return true; return true;
@ -78,14 +82,12 @@ struct TensorEvaluator
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data); eigen_assert(m_data != NULL);
return m_data[index]; return m_data[index];
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
typename internal::traits<Derived>::template MakePointer<Scalar>::RefType eigen_assert(m_data != NULL);
coeffRef(Index index) {
eigen_assert(m_data);
return m_data[index]; return m_data[index];
} }
@ -114,7 +116,7 @@ struct TensorEvaluator
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
eigen_assert(m_data); eigen_assert(m_data != NULL);
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
return m_data[m_dims.IndexOfColMajor(coords)]; return m_data[m_dims.IndexOfColMajor(coords)];
} else { } else {
@ -122,10 +124,9 @@ struct TensorEvaluator
} }
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
typename internal::traits<Derived>::template MakePointer<Scalar>::RefType
coeffRef(const array<DenseIndex, NumCoords>& coords) { coeffRef(const array<DenseIndex, NumCoords>& coords) {
eigen_assert(m_data); eigen_assert(m_data != NULL);
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
return m_data[m_dims.IndexOfColMajor(coords)]; return m_data[m_dims.IndexOfColMajor(coords)];
} else { } else {
@ -152,16 +153,18 @@ struct TensorEvaluator
TensorBlockWriter::Run(block, m_data); TensorBlockWriter::Run(block, m_data);
} }
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
/// required by sycl in order to construct sycl buffer from raw pointer
const Device& device() const{return m_device;}
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_data.bind(cgh);
}
#endif
protected: protected:
typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data; EvaluatorPointerType m_data;
Dimensions m_dims; Dimensions m_dims;
const Device& m_device; const Device m_device;
const Derived& m_impl;
}; };
namespace { namespace {
@ -184,6 +187,13 @@ Eigen::half loadConstant(const Eigen::half* address) {
return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
} }
#endif #endif
#ifdef EIGEN_USE_SYCL
// overload of load constant should be implemented here based on range access
template <cl::sycl::access::mode AcMd, typename T>
T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
return *address;
}
#endif
} }
@ -197,7 +207,9 @@ struct TensorEvaluator<const Derived, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename Derived::Dimensions Dimensions; typedef typename Derived::Dimensions Dimensions;
typedef const Derived XprType; typedef const Derived XprType;
typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
typedef StorageMemory<const Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
// NumDimensions is -1 for variable dim tensors // NumDimensions is -1 for variable dim tensors
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ? static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
@ -221,18 +233,15 @@ struct TensorEvaluator<const Derived, Device>
typename internal::remove_const<Scalar>::type, Index, NumCoords, Layout> typename internal::remove_const<Scalar>::type, Index, NumCoords, Layout>
TensorBlockReader; TensorBlockReader;
// Used for accessor extraction in SYCL Managed TensorMap:
const Derived& derived() const { return m_impl; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
: m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
{ } { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) { if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar)); m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
return false; return false;
} }
return true; return true;
@ -241,13 +250,8 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data); eigen_assert(m_data != NULL);
#ifndef __SYCL_DEVICE_ONLY__
return loadConstant(m_data+index); return loadConstant(m_data+index);
#else
CoeffReturnType tmp = m_data[index];
return tmp;
#endif
} }
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -269,7 +273,7 @@ struct TensorEvaluator<const Derived, Device>
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
eigen_assert(m_data); eigen_assert(m_data != NULL);
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords) const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
: m_dims.IndexOfRowMajor(coords); : m_dims.IndexOfRowMajor(coords);
return loadConstant(m_data+index); return loadConstant(m_data+index);
@ -288,16 +292,17 @@ struct TensorEvaluator<const Derived, Device>
TensorBlockReader::Run(block, m_data); TensorBlockReader::Run(block, m_data);
} }
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
#ifdef EIGEN_USE_SYCL
/// added for sycl in order to construct the buffer from the sycl device // binding placeholder accessors to a command group handler for SYCL
const Device& device() const{return m_device;} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_data.bind(cgh);
}
#endif
protected: protected:
typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data; EvaluatorPointerType m_data;
Dimensions m_dims; Dimensions m_dims;
const Device& m_device; const Device m_device;
const Derived& m_impl;
}; };
@ -310,16 +315,6 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
{ {
typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType; typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
enum {
IsAligned = true,
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
TensorEvaluator(const XprType& op, const Device& device) TensorEvaluator(const XprType& op, const Device& device)
: m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
@ -331,10 +326,26 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum {
IsAligned = true,
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
#ifdef EIGEN_USE_SYCL
&& (PacketType<CoeffReturnType, Device>::size >1)
#endif
,
BlockAccess = false,
PreferBlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
RawAccess = false
};
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
@ -354,13 +365,14 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
PacketType<CoeffReturnType, Device>::size); PacketType<CoeffReturnType, Device>::size);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; }
/// required by sycl in order to extract the accessor
NullaryOp functor() const { return m_functor; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_argImpl.bind(cgh);
}
#endif
private: private:
const NullaryOp m_functor; const NullaryOp m_functor;
@ -401,14 +413,15 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static const int NumDims = internal::array_size<Dimensions>::value; static const int NumDims = internal::array_size<Dimensions>::value;
typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout> typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
TensorBlock; TensorBlock;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_argImpl.evalSubExprsIfNeeded(NULL); m_argImpl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -456,16 +469,18 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
arg_block.data()); arg_block.data());
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
/// required by sycl in order to extract the accessor #ifdef EIGEN_USE_SYCL
const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; } // binding placeholder accessors to a command group handler for SYCL
/// added for sycl in order to construct the buffer from sycl device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{
UnaryOp functor() const { return m_functor; } m_argImpl.bind(cgh);
}
#endif
private: private:
const Device& m_device; const Device m_device;
const UnaryOp m_functor; const UnaryOp m_functor;
TensorEvaluator<ArgType, Device> m_argImpl; TensorEvaluator<ArgType, Device> m_argImpl;
}; };
@ -509,6 +524,8 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
static const int NumDims = internal::array_size< static const int NumDims = internal::array_size<
typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value; typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
@ -524,7 +541,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
return m_leftImpl.dimensions(); return m_leftImpl.dimensions();
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_leftImpl.evalSubExprsIfNeeded(NULL); m_leftImpl.evalSubExprsIfNeeded(NULL);
m_rightImpl.evalSubExprsIfNeeded(NULL); m_rightImpl.evalSubExprsIfNeeded(NULL);
return true; return true;
@ -576,16 +593,17 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
right_block.block_strides(), right_block.data()); right_block.block_strides(), right_block.data());
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
/// required by sycl in order to extract the accessor
BinaryOp functor() const { return m_functor; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_leftImpl.bind(cgh);
m_rightImpl.bind(cgh);
}
#endif
private: private:
const Device& m_device; const Device m_device;
const BinaryOp m_functor; const BinaryOp m_functor;
TensorEvaluator<LeftArgType, Device> m_leftImpl; TensorEvaluator<LeftArgType, Device> m_leftImpl;
TensorEvaluator<RightArgType, Device> m_rightImpl; TensorEvaluator<RightArgType, Device> m_rightImpl;
@ -639,6 +657,8 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
{ {
@ -646,7 +666,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
return m_arg1Impl.dimensions(); return m_arg1Impl.dimensions();
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_arg1Impl.evalSubExprsIfNeeded(NULL); m_arg1Impl.evalSubExprsIfNeeded(NULL);
m_arg2Impl.evalSubExprsIfNeeded(NULL); m_arg2Impl.evalSubExprsIfNeeded(NULL);
m_arg3Impl.evalSubExprsIfNeeded(NULL); m_arg3Impl.evalSubExprsIfNeeded(NULL);
@ -679,14 +699,16 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
/// required by sycl in order to extract the accessor #ifdef EIGEN_USE_SYCL
const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; } // binding placeholder accessors to a command group handler for SYCL
/// required by sycl in order to extract the accessor EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; } m_arg1Impl.bind(cgh);
/// required by sycl in order to extract the accessor m_arg2Impl.bind(cgh);
const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; } m_arg3Impl.bind(cgh);
}
#endif
private: private:
const TernaryOp m_functor; const TernaryOp m_functor;
@ -731,6 +753,8 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
{ {
@ -738,7 +762,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
return m_condImpl.dimensions(); return m_condImpl.dimensions();
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_condImpl.evalSubExprsIfNeeded(NULL); m_condImpl.evalSubExprsIfNeeded(NULL);
m_thenImpl.evalSubExprsIfNeeded(NULL); m_thenImpl.evalSubExprsIfNeeded(NULL);
m_elseImpl.evalSubExprsIfNeeded(NULL); m_elseImpl.evalSubExprsIfNeeded(NULL);
@ -757,13 +781,15 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
template<int LoadMode> template<int LoadMode>
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
{ {
internal::Selector<PacketSize> select; internal::Selector<PacketSize> select;
for (Index i = 0; i < PacketSize; ++i) { EIGEN_UNROLL_LOOP
select.select[i] = m_condImpl.coeff(index+i); for (Index i = 0; i < PacketSize; ++i) {
} select.select[i] = m_condImpl.coeff(index+i);
return internal::pblend(select, }
m_thenImpl.template packet<LoadMode>(index), return internal::pblend(select,
m_elseImpl.template packet<LoadMode>(index)); m_thenImpl.template packet<LoadMode>(index),
m_elseImpl.template packet<LoadMode>(index));
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
@ -773,14 +799,16 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
.cwiseMax(m_elseImpl.costPerCoeff(vectorized)); .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_condImpl.bind(cgh);
m_thenImpl.bind(cgh);
m_elseImpl.bind(cgh);
}
#endif
private: private:
TensorEvaluator<IfArgType, Device> m_condImpl; TensorEvaluator<IfArgType, Device> m_condImpl;
TensorEvaluator<ThenArgType, Device> m_thenImpl; TensorEvaluator<ThenArgType, Device> m_thenImpl;

View File

@ -442,12 +442,133 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til
// SYCL Executor policy // SYCL Executor policy
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
template <typename Expression, bool Vectorizable> template <bool Vectorizable, typename Evaluator>
class TensorExecutor<Expression, SyclDevice, Vectorizable> { struct ExecExprFunctorKernel_impl {
public: typedef typename Evaluator::Index Index;
static EIGEN_STRONG_INLINE void run(const Expression &expr, const SyclDevice &device) { const Index range;
// call TensorSYCL module const Index vectorizable_threads;
TensorSycl::run(expr, device); Evaluator evaluator;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ExecExprFunctorKernel_impl(
const Index range_, const Index vectorizable_threads_,
Evaluator evaluator_)
: range(range_), vectorizable_threads(vectorizable_threads_),
evaluator(evaluator_) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
operator()(cl::sycl::nd_item<1> itemID) {
Index gId = static_cast<Index>(itemID.get_global_linear_id());
Index total_threads = itemID.get_global_range(0);
EIGEN_UNROLL_LOOP
for (Index i = gId; i < range; i += total_threads) {
evaluator.evalScalar(i);
}
}
};
template <typename Evaluator>
struct ExecExprFunctorKernel_impl<true, Evaluator> {
typedef typename Evaluator::Index Index;
const Index range;
const Index vectorizable_threads;
Evaluator evaluator;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ExecExprFunctorKernel_impl(
const Index range_, const Index vectorizable_threads_,
Evaluator evaluator_)
: range(range_), vectorizable_threads(vectorizable_threads_),
evaluator(evaluator_) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
operator()(cl::sycl::nd_item<1> itemID) {
Index gId = static_cast<Index>(itemID.get_global_linear_id());
if (gId < vectorizable_threads) {
const Index PacketSize = Eigen::internal::unpacket_traits<
typename Evaluator::PacketReturnType>::size;
evaluator.evalPacket(gId * PacketSize);
gId += (vectorizable_threads * PacketSize);
EIGEN_UNROLL_LOOP
for (Index i = gId; i < range; i += vectorizable_threads) {
evaluator.evalScalar(i);
}
}
}
};
template <typename Expr, bool NonZeroVectoriseSize, typename Evaluator>
struct ExecExprFunctorKernel
: ExecExprFunctorKernel_impl<
::Eigen::internal::IsVectorizable<Eigen::SyclDevice, Expr>::value,
Evaluator> {
ExecExprFunctorKernel(const Index range_, const Index vectorizable_threads_,
const Evaluator &evaluator)
: ExecExprFunctorKernel_impl<
::Eigen::internal::IsVectorizable<Eigen::SyclDevice, Expr>::value,
Evaluator>(range_, vectorizable_threads_, evaluator) {}
};
template <typename Expr, typename Evaluator>
struct ExecExprFunctorKernel<Expr, false, Evaluator>
: ExecExprFunctorKernel_impl<false, Evaluator> {
ExecExprFunctorKernel(const Index range_, const Index vectorizable_threads_,
const Evaluator &evaluator)
: ExecExprFunctorKernel_impl<false, Evaluator>(
range_, vectorizable_threads_, evaluator) {}
};
template <typename Expression, bool Vectorizable, bool Tileable>
class TensorExecutor<Expression, Eigen::SyclDevice, Vectorizable, Tileable> {
public:
typedef typename Expression::Index Index;
static EIGEN_STRONG_INLINE void run(const Expression &expr, const Eigen::SyclDevice &dev) {
Eigen::TensorEvaluator<Expression, Eigen::SyclDevice> evaluator(expr, dev);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign) {
Index range, GRange, tileSize;
Index total_size = ::Eigen::internal::array_prod(evaluator.dimensions());
total_size = (total_size == 0) ? 1 : total_size;
const int PacketSize = Eigen::PacketType<
typename Eigen::TensorEvaluator<Expression, Eigen::SyclDevice>::CoeffReturnType,
Eigen::SyclDevice>::size;
Index vectorizable_threads =
static_cast<Index>(total_size / PacketSize);
dev.parallel_for_setup(vectorizable_threads, tileSize, range, GRange);
range = total_size;
auto f = [&](cl::sycl::handler &cgh) {
evaluator.bind(cgh);
typedef ExecExprFunctorKernel<Expression, true,
Eigen::TensorEvaluator<Expression, Eigen::SyclDevice>>
conditional_vectorized_kernel;
typedef ExecExprFunctorKernel<Expression, false,
Eigen::TensorEvaluator<Expression, Eigen::SyclDevice>>
non_vectorized_kernel;
// This is to make sure that an expression with a size less than vectorized size
// will not call the vectorized kernel.
// The reason for having this kernel is that the vectorisable parameter is a
// compile-time parameter,
// however, the size of a tensor is a run-time parameter
(vectorizable_threads)
? cgh.parallel_for(
#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
dev.program().template get_kernel<vectorized_kernel>(),
#endif
cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange),
cl::sycl::range<1>(tileSize)),
conditional_vectorized_kernel(range, vectorizable_threads,
evaluator))
: cgh.parallel_for(
#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
dev.program().template get_kernel<non_vectorized_kernel>(),
#endif
cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange),
cl::sycl::range<1>(tileSize)),
non_vectorized_kernel(range, vectorizable_threads,
evaluator));
};
cl::sycl::event e;
EIGEN_SYCL_TRY_CATCH(e = dev.sycl_queue().submit(f));
dev.async_synchronize(e);
}
evaluator.cleanup();
} }
}; };

View File

@ -89,7 +89,10 @@ struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
static const int NumDimensions = XprTraits::NumDimensions; static const int NumDimensions = XprTraits::NumDimensions;
static const int Layout = XprTraits::Layout; static const int Layout = XprTraits::Layout;
typedef typename TypeConversion<Scalar, typename XprTraits::PointerType>::type PointerType; typedef typename TypeConversion<Scalar,
typename XprTraits::PointerType
>::type
PointerType;
}; };
template<typename UnaryOp, typename XprType> template<typename UnaryOp, typename XprType>
@ -164,9 +167,10 @@ struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
static const int Layout = XprTraits::Layout; static const int Layout = XprTraits::Layout;
typedef typename TypeConversion<Scalar, typedef typename TypeConversion<Scalar,
typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val, typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
typename traits<LhsXprType>::PointerType, typename traits<LhsXprType>::PointerType,
typename traits<RhsXprType>::PointerType>::type typename traits<RhsXprType>::PointerType>::type
>::type PointerType; >::type
PointerType;
enum { enum {
Flags = 0 Flags = 0
}; };
@ -245,9 +249,10 @@ struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprT
static const int Layout = XprTraits::Layout; static const int Layout = XprTraits::Layout;
typedef typename TypeConversion<Scalar, typedef typename TypeConversion<Scalar,
typename conditional<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val, typename conditional<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val,
typename traits<Arg2XprType>::PointerType, typename traits<Arg2XprType>::PointerType,
typename traits<Arg3XprType>::PointerType>::type typename traits<Arg3XprType>::PointerType>::type
>::type PointerType; >::type
PointerType;
enum { enum {
Flags = 0 Flags = 0
}; };

View File

@ -131,6 +131,8 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
typedef OutputScalar CoeffReturnType; typedef OutputScalar CoeffReturnType;
typedef typename PacketType<OutputScalar, Device>::type PacketReturnType; typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -167,13 +169,13 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
return m_dimensions; return m_dimensions;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
if (data) { if (data) {
evalToBuf(data); evalToBuf(data);
return false; return false;
} else { } else {
m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); m_data = (EvaluatorPointerType)m_device.get((CoeffReturnType*)(m_device.allocate_temp(sizeof(CoeffReturnType) * m_size)));
evalToBuf(m_data); evalToBuf(m_data);
return true; return true;
} }
@ -202,11 +204,16 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_data.bind(cgh);
}
#endif
private: private:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(EvaluatorPointerType data) {
const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value; const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value;
ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size);
@ -576,12 +583,12 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
protected: protected:
Index m_size; Index m_size;
const FFT& m_fft; const FFT EIGEN_DEVICE_REF m_fft;
Dimensions m_dimensions; Dimensions m_dimensions;
array<Index, NumDims> m_strides; array<Index, NumDims> m_strides;
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
CoeffReturnType* m_data; EvaluatorPointerType m_data;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
// This will support a maximum FFT size of 2^32 for each dimension // This will support a maximum FFT size of 2^32 for each dimension
// m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2;

View File

@ -78,9 +78,10 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
}; };
template<typename ArgType, typename Device> template<typename ArgType_, typename Device>
struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
{ {
typedef const typename internal::remove_all<ArgType_>::type ArgType;
typedef TensorForcedEvalOp<ArgType> XprType; typedef TensorForcedEvalOp<ArgType> XprType;
typedef typename ArgType::Scalar Scalar; typedef typename ArgType::Scalar Scalar;
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
@ -88,6 +89,9 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = true, IsAligned = true,
@ -106,8 +110,8 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
TensorBlockReader; TensorBlockReader;
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
/// op_ is used for sycl : m_impl(op.expression(), device), m_op(op.expression()),
: m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) m_device(device), m_buffer(NULL)
{ } { }
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
@ -115,17 +119,19 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
#if !defined(EIGEN_HIPCC) #if !defined(EIGEN_HIPCC)
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
#endif #endif
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
const Index numValues = internal::array_prod(m_impl.dimensions()); const Index numValues = internal::array_prod(m_impl.dimensions());
m_buffer = (CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)); m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)));
#ifndef EIGEN_USE_SYCL
// Should initialize the memory in case we're dealing with non POD types. // Should initialize the memory in case we're dealing with non POD types.
if (NumTraits<CoeffReturnType>::RequireInitialization) { if (NumTraits<CoeffReturnType>::RequireInitialization) {
for (Index i = 0; i < numValues; ++i) { for (Index i = 0; i < numValues; ++i) {
new(m_buffer+i) CoeffReturnType(); new(m_buffer+i) CoeffReturnType();
} }
} }
#endif
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo; typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
EvalTo evalToTmp(m_buffer, m_op); EvalTo evalToTmp(m_device.get(m_buffer), m_op);
const bool Vectorize = internal::IsVectorizable<Device, const ArgType>::value; const bool Vectorize = internal::IsVectorizable<Device, const ArgType>::value;
internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, Vectorize>::run(evalToTmp, m_device); internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, Vectorize>::run(evalToTmp, m_device);
return true; return true;
@ -159,17 +165,20 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Eigen::internal::traits<XprType>::PointerType data() const { return m_buffer; } EvaluatorPointerType data() const { return m_buffer; }
/// required by sycl in order to extract the sycl accessor #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() { return m_impl; } // binding placeholder accessors to a command group handler for SYCL
/// used by sycl in order to build the sycl buffer EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const{return m_device;} m_buffer.bind(cgh);
m_impl.bind(cgh);
}
#endif
private: private:
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
const ArgType m_op; const ArgType m_op;
const Device& m_device; const Device m_device;
CoeffReturnType* m_buffer; EvaluatorPointerType m_buffer;
}; };

View File

@ -20,17 +20,20 @@ namespace Eigen {
// map_allocator. // map_allocator.
template<typename T> struct MakePointer { template<typename T> struct MakePointer {
typedef T* Type; typedef T* Type;
typedef T& RefType;
typedef T ScalarType;
}; };
// The PointerType class is a container of the device specefic pointer template <typename T>
// used for referring to a Pointer on TensorEvaluator class. While the TensorExpression EIGEN_STRONG_INLINE T* constCast(const T* data) {
return const_cast<T*>(data);
}
// The StorageMemory class is a container of the device specific pointer
// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
// is a device-agnostic type and need MakePointer class for type conversion, // is a device-agnostic type and need MakePointer class for type conversion,
// the TensorEvaluator calls can be specialized for a device, hence it is possible // the TensorEvaluator class can be specialized for a device, hence it is possible
// to construct different types of temproray storage memory in TensorEvaluator // to construct different types of temproray storage memory in TensorEvaluator
// for different devices by specializing the following PointerType class. // for different devices by specializing the following StorageMemory class.
template<typename T, typename Device> struct PointerType : MakePointer<T>{}; template<typename T, typename device> struct StorageMemory: MakePointer <T> {};
namespace internal{ namespace internal{
template<typename A, typename B> struct Pointer_type_promotion { template<typename A, typename B> struct Pointer_type_promotion {
@ -39,25 +42,11 @@ template<typename A, typename B> struct Pointer_type_promotion {
template<typename A> struct Pointer_type_promotion<A, A> { template<typename A> struct Pointer_type_promotion<A, A> {
static const bool val = true; static const bool val = true;
}; };
template<typename A, typename B> struct TypeConversion; template<typename A, typename B> struct TypeConversion {
#ifndef __SYCL_DEVICE_ONLY__
template<typename A, typename B> struct TypeConversion{
typedef A* type; typedef A* type;
}; };
#endif
} }
#if defined(EIGEN_USE_SYCL)
namespace TensorSycl {
namespace internal{
template <typename HostExpr, typename FunctorExpr, typename Tuple_of_Acc, typename Dims, typename Op, typename Index> class ReductionFunctor;
template<typename CoeffReturnType ,typename OutAccessor, typename HostExpr, typename FunctorExpr, typename Op, typename Dims, typename Index, typename TupleType>
class FullReductionKernelFunctor;
}
}
#endif
template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap; template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor; template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
@ -113,6 +102,31 @@ struct ThreadPoolDevice;
struct GpuDevice; struct GpuDevice;
struct SyclDevice; struct SyclDevice;
#ifdef EIGEN_USE_SYCL
template <typename T> struct MakeSYCLPointer {
typedef Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T> Type;
};
template <typename T>
EIGEN_STRONG_INLINE const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>&
constCast(const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>& data) {
return data;
}
template <typename T>
struct StorageMemory<T, SyclDevice> : MakeSYCLPointer<T> {};
template <typename T>
struct StorageMemory<T, const SyclDevice> : StorageMemory<T, SyclDevice> {};
namespace TensorSycl {
namespace internal{
template <typename Evaluator, typename Op> class ReductionFunctor;
}
}
#endif
enum FFTResultType { enum FFTResultType {
RealPart = 0, RealPart = 0,
ImagPart = 1, ImagPart = 1,

View File

@ -421,6 +421,7 @@ class GaussianGenerator {
const array<T, NumDims>& std_devs) const array<T, NumDims>& std_devs)
: m_means(means) : m_means(means)
{ {
EIGEN_UNROLL_LOOP
for (size_t i = 0; i < NumDims; ++i) { for (size_t i = 0; i < NumDims; ++i) {
m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2; m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
} }
@ -428,6 +429,7 @@ class GaussianGenerator {
EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const { EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const {
T tmp = T(0); T tmp = T(0);
EIGEN_UNROLL_LOOP
for (size_t i = 0; i < NumDims; ++i) { for (size_t i = 0; i < NumDims; ++i) {
T offset = coordinates[i] - m_means[i]; T offset = coordinates[i] - m_means[i];
tmp += offset * offset / m_two_sigmas[i]; tmp += offset * offset / m_two_sigmas[i];

View File

@ -88,6 +88,8 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
@ -104,22 +106,21 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
TensorBlock; TensorBlock;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_device(device), m_generator(op.generator()) : m_device(device), m_generator(op.generator())
#ifdef EIGEN_USE_SYCL
, m_argImpl(op.expression(), device)
#endif
{ {
TensorEvaluator<ArgType, Device> argImpl(op.expression(), device); TensorEvaluator<ArgType, Device> argImpl(op.expression(), device);
m_dimensions = argImpl.dimensions(); m_dimensions = argImpl.dimensions();
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
m_strides[0] = 1; m_strides[0] = 1;
EIGEN_UNROLL_LOOP
for (int i = 1; i < NumDims; ++i) { for (int i = 1; i < NumDims; ++i) {
m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]); if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
} }
} else { } else {
m_strides[NumDims - 1] = 1; m_strides[NumDims - 1] = 1;
EIGEN_UNROLL_LOOP
for (int i = NumDims - 2; i >= 0; --i) { for (int i = NumDims - 2; i >= 0; --i) {
m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]); if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
@ -129,7 +130,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
return true; return true;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
@ -234,11 +235,11 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
TensorOpCost::MulCost<Scalar>()); TensorOpCost::MulCost<Scalar>());
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; } // binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Generator& functor() const { return m_generator; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler&) const {}
#endif #endif
protected: protected:
@ -261,14 +262,11 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
} }
} }
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
Dimensions m_dimensions; Dimensions m_dimensions;
array<Index, NumDims> m_strides; array<Index, NumDims> m_strides;
array<IndexDivisor, NumDims> m_fast_strides; array<IndexDivisor, NumDims> m_fast_strides;
Generator m_generator; Generator m_generator;
#ifdef EIGEN_USE_SYCL
TensorEvaluator<ArgType, Device> m_argImpl;
#endif
}; };
} // end namespace Eigen } // end namespace Eigen

View File

@ -154,23 +154,6 @@ class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprT
m_padding_left(padding_left), m_padding_right(padding_right), m_padding_left(padding_left), m_padding_right(padding_right),
m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
#ifdef EIGEN_USE_SYCL // this is work around for sycl as Eigen could not use c++11 deligate constructor feature
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols,
DenseIndex row_strides, DenseIndex col_strides,
DenseIndex in_row_strides, DenseIndex in_col_strides,
DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
bool padding_explicit, DenseIndex padding_top, DenseIndex padding_bottom,
DenseIndex padding_left, DenseIndex padding_right, PaddingType padding_type,
Scalar padding_value)
: m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
m_row_strides(row_strides), m_col_strides(col_strides),
m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
m_padding_explicit(padding_explicit), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
m_padding_left(padding_left), m_padding_right(padding_right),
m_padding_type(padding_type), m_padding_value(padding_value) {}
#endif
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
DenseIndex patch_rows() const { return m_patch_rows; } DenseIndex patch_rows() const { return m_patch_rows; }
@ -242,6 +225,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -256,15 +241,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
typedef internal::TensorBlock<Scalar, Index, NumDims, Layout> typedef internal::TensorBlock<Scalar, Index, NumDims, Layout>
OutputTensorBlock; OutputTensorBlock;
#ifdef __SYCL_DEVICE_ONLY__ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType op, const Device& device)
#else
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
#endif
: m_device(device), m_impl(op.expression(), device) : m_device(device), m_impl(op.expression(), device)
#ifdef EIGEN_USE_SYCL
, m_op(op)
#endif
{ {
EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
@ -410,7 +388,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -516,13 +494,15 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
return packetWithPossibleZero(index); return packetWithPossibleZero(index);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
// Required by SYCL in order to construct the expression tree on the device // binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& xpr() const { return m_op; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif #endif
Index rowPaddingTop() const { return m_rowPaddingTop; } Index rowPaddingTop() const { return m_rowPaddingTop; }
@ -693,6 +673,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
{ {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i); values[i] = coeff(index+i);
} }
@ -744,12 +725,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
Scalar m_paddingValue; Scalar m_paddingValue;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
#ifdef EIGEN_USE_SYCL
// Required for SYCL in order to construct the expression tree on the device
XprType m_op;
#endif
}; };

View File

@ -353,6 +353,7 @@ namespace internal {
template<typename FirstType, typename... OtherTypes> template<typename FirstType, typename... OtherTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index array_prod(const IndexList<FirstType, OtherTypes...>& sizes) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index array_prod(const IndexList<FirstType, OtherTypes...>& sizes) {
Index result = 1; Index result = 1;
EIGEN_UNROLL_LOOP
for (size_t i = 0; i < array_size<IndexList<FirstType, OtherTypes...> >::value; ++i) { for (size_t i = 0; i < array_size<IndexList<FirstType, OtherTypes...> >::value; ++i) {
result *= sizes[i]; result *= sizes[i];
} }

View File

@ -86,6 +86,8 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false, IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
@ -131,7 +133,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -146,6 +148,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
eigen_assert(index < dimensions().TotalSize()); eigen_assert(index < dimensions().TotalSize());
*inputIndex = 0; *inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStrides[i]; const Index idx = index / m_outputStrides[i];
if (idx != idx / m_fastStrides[i] * m_strides[i]) { if (idx != idx / m_fastStrides[i] * m_strides[i]) {
@ -160,6 +163,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
*inputIndex += index / m_strides[0]; *inputIndex += index / m_strides[0];
return true; return true;
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / m_outputStrides[i]; const Index idx = index / m_outputStrides[i];
if (idx != idx / m_fastStrides[i] * m_strides[i]) { if (idx != idx / m_fastStrides[i] * m_strides[i]) {
@ -195,6 +199,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i); values[i] = coeff(index+i);
} }
@ -215,11 +220,13 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
compute_cost, vectorized, PacketSize); compute_cost, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } // binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Strides& functor() const { return m_strides; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif #endif
protected: protected:

View File

@ -37,7 +37,7 @@ namespace {
{ {
#ifdef EIGEN_GPU_COMPILE_PHASE #ifdef EIGEN_GPU_COMPILE_PHASE
return __clz(val); return __clz(val);
#elif defined(__SYCL_DEVICE_ONLY__) #elif defined(SYCL_DEVICE_ONLY)
return cl::sycl::clz(val); return cl::sycl::clz(val);
#elif EIGEN_COMP_MSVC #elif EIGEN_COMP_MSVC
unsigned long index; unsigned long index;
@ -55,8 +55,8 @@ namespace {
{ {
#ifdef EIGEN_GPU_COMPILE_PHASE #ifdef EIGEN_GPU_COMPILE_PHASE
return __clzll(val); return __clzll(val);
#elif defined(__SYCL_DEVICE_ONLY__) #elif defined(SYCL_DEVICE_ONLY)
return cl::sycl::clz(val); return static_cast<int>(cl::sycl::clz(val));
#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64 #elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64
unsigned long index; unsigned long index;
_BitScanReverse64(&index, val); _BitScanReverse64(&index, val);
@ -92,7 +92,7 @@ namespace {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) {
#if defined(EIGEN_GPU_COMPILE_PHASE) #if defined(EIGEN_GPU_COMPILE_PHASE)
return __umulhi(a, b); return __umulhi(a, b);
#elif defined(__SYCL_DEVICE_ONLY__) #elif defined(SYCL_DEVICE_ONLY)
return cl::sycl::mul_hi(a, static_cast<uint32_t>(b)); return cl::sycl::mul_hi(a, static_cast<uint32_t>(b));
#else #else
return (static_cast<uint64_t>(a) * b) >> 32; return (static_cast<uint64_t>(a) * b) >> 32;
@ -103,7 +103,7 @@ namespace {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
#if defined(EIGEN_GPU_COMPILE_PHASE) #if defined(EIGEN_GPU_COMPILE_PHASE)
return __umul64hi(a, b); return __umul64hi(a, b);
#elif defined(__SYCL_DEVICE_ONLY__) #elif defined(SYCL_DEVICE_ONLY)
return cl::sycl::mul_hi(a, static_cast<uint64_t>(b)); return cl::sycl::mul_hi(a, static_cast<uint64_t>(b));
#elif defined(__SIZEOF_INT128__) #elif defined(__SIZEOF_INT128__)
__uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
@ -124,7 +124,7 @@ namespace {
template <typename T> template <typename T>
struct DividerHelper<64, T> { struct DividerHelper<64, T> {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
#if defined(__SIZEOF_INT128__) && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__) #if defined(__SIZEOF_INT128__) && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY)
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
#else #else
const uint64_t shift = 1ULL << log_div; const uint64_t shift = 1ULL << log_div;
@ -205,8 +205,8 @@ class TensorIntDivisor<int32_t, true> {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const {
#ifdef EIGEN_GPU_COMPILE_PHASE #ifdef EIGEN_GPU_COMPILE_PHASE
return (__umulhi(magic, n) >> shift); return (__umulhi(magic, n) >> shift);
#elif defined(__SYCL_DEVICE_ONLY__) #elif defined(SYCL_DEVICE_ONLY)
return (cl::sycl::mul_hi(static_cast<uint64_t>(magic), static_cast<uint64_t>(n)) >> shift); return (cl::sycl::mul_hi(magic, static_cast<uint32_t>(n)) >> shift);
#else #else
uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n); uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n);
return (static_cast<uint32_t>(v >> 32) >> shift); return (static_cast<uint32_t>(v >> 32) >> shift);

View File

@ -134,13 +134,22 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
} }
} }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
return m_impl.evalSubExprsIfNeeded(data); return m_impl.evalSubExprsIfNeeded(data);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
@ -162,7 +171,9 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
return m_impl.costPerCoeff(vectorized); return m_impl.costPerCoeff(vectorized);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_impl.data(); } EIGEN_DEVICE_FUNC typename Storage::Type data() const {
return constCast(m_impl.data());
}
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }

View File

@ -59,4 +59,39 @@
#define EIGEN_SLEEP(n) sleep(std::max<unsigned>(1, n/1000)) #define EIGEN_SLEEP(n) sleep(std::max<unsigned>(1, n/1000))
#endif #endif
// Define a macro to use a reference on the host but a value on the device
#if defined(SYCL_DEVICE_ONLY)
#define EIGEN_DEVICE_REF
#else
#define EIGEN_DEVICE_REF &
#endif
// Define a macro for catching SYCL exceptions if exceptions are enabled
#if defined(EIGEN_EXCEPTIONS)
#define EIGEN_SYCL_TRY_CATCH(X) \
do { \
try { X; } \
catch(const cl::sycl::exception& e) { \
std::cerr << "SYCL exception at " \
<< __FILE__ << ":" << __LINE__ << std::endl \
<< e.what() << std::endl; \
std::rethrow_exception(std::current_exception()); \
} \
} while (false)
#else
#define EIGEN_SYCL_TRY_CATCH(X) X
#endif
// Define a macro if local memory flags are unset or one of them is set
// Setting both flags is the same as unsetting them
#if (!defined(EIGEN_SYCL_LOCAL_MEM) && !defined(EIGEN_SYCL_NO_LOCAL_MEM)) || \
(defined(EIGEN_SYCL_LOCAL_MEM) && defined(EIGEN_SYCL_NO_LOCAL_MEM))
#define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON 1
#define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_OFF 1
#elif defined(EIGEN_SYCL_LOCAL_MEM) && !defined(EIGEN_SYCL_NO_LOCAL_MEM)
#define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON 1
#elif !defined(EIGEN_SYCL_LOCAL_MEM) && defined(EIGEN_SYCL_NO_LOCAL_MEM)
#define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_OFF 1
#endif
#endif #endif

View File

@ -31,8 +31,12 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
public: public:
typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self; typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
typedef typename PlainObjectType::Base Base; typedef typename PlainObjectType::Base Base;
typedef typename Eigen::internal::nested<Self>::type Nested; #ifdef EIGEN_USE_SYCL
typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind; typedef typename Eigen::internal::remove_reference<typename Eigen::internal::nested<Self>::type>::type Nested;
#else
typedef typename Eigen::internal::nested<Self>::type Nested;
#endif
typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
typedef typename internal::traits<PlainObjectType>::Index Index; typedef typename internal::traits<PlainObjectType>::Index Index;
typedef typename internal::traits<PlainObjectType>::Scalar Scalar; typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;

View File

@ -85,9 +85,57 @@ struct PacketType<half, GpuDevice> {
#endif #endif
#if defined(EIGEN_USE_SYCL) #if defined(EIGEN_USE_SYCL)
template <typename T>
struct PacketType<T, SyclDevice> { namespace TensorSycl {
typedef T type; namespace internal {
template <typename Index, Index A, Index B> struct PlusOp {
static constexpr Index Value = A + B;
};
template <typename Index, Index A, Index B> struct DivOp {
static constexpr Index Value = A / B;
};
template <typename Index, Index start, Index end, Index step,
template <class Indx, Indx...> class StepOp>
struct static_for {
template <typename UnaryOperator>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void loop(UnaryOperator op) {
op(start);
static_for<Index, StepOp<Index, start, step>::Value, end, step,
StepOp>::loop(op);
}
};
template <typename Index, Index end, Index step,
template <class Indx, Indx...> class StepOp>
struct static_for<Index, end, end, step, StepOp> {
template <typename UnaryOperator>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void loop(UnaryOperator) {}
};
template <typename OutScalar, typename Device, bool Vectorizable>
struct Vectorise {
static const int PacketSize = 1;
typedef OutScalar PacketReturnType;
};
template <typename OutScalar, typename Device>
struct Vectorise<OutScalar, Device, true> {
static const int PacketSize = Eigen::PacketType<OutScalar, Device>::size;
typedef typename Eigen::PacketType<OutScalar, Device>::type PacketReturnType;
};
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index roundUp(Index x, Index y) {
return ((((x) + (y)-1) / (y)) * (y));
}
} // namespace internal
} // namespace TensorSycl
template <>
struct PacketType<half, SyclDevice> {
typedef half type;
static const int size = 1; static const int size = 1;
enum { enum {
HasAdd = 0, HasAdd = 0,
@ -104,8 +152,58 @@ template <typename T>
HasBlend = 0 HasBlend = 0
}; };
}; };
#endif template <typename Scalar>
struct PacketType<Scalar, SyclDevice> : internal::default_packet_traits {
typedef Scalar type;
typedef Scalar half;
enum {
Vectorizable = 0,
size = 1,
AlignedOnScalar = 0,
HasHalfPacket = 0
};
enum {
HasAdd = 0,
HasSub = 0,
HasMul = 0,
HasNegate = 0,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasConj = 0,
HasSetLinear = 0
};
};
template <typename Scalar>
struct PacketType<Scalar, const SyclDevice> : PacketType<Scalar, SyclDevice>{};
#ifndef EIGEN_DONT_VECTORIZE_SYCL
#define PACKET_TYPE(CVQual, Type, val, lengths, DEV)\
template<> struct PacketType<CVQual Type, DEV> : internal::sycl_packet_traits<val, lengths> \
{\
typedef typename internal::packet_traits<Type>::type type;\
typedef typename internal::packet_traits<Type>::half half;\
};
PACKET_TYPE(const, float, 1, 4, SyclDevice)
PACKET_TYPE(, float, 1, 4, SyclDevice)
PACKET_TYPE(const, float, 1, 4, const SyclDevice)
PACKET_TYPE(, float, 1, 4, const SyclDevice)
PACKET_TYPE(const, double, 0, 2, SyclDevice)
PACKET_TYPE(, double, 0, 2, SyclDevice)
PACKET_TYPE(const, double, 0, 2, const SyclDevice)
PACKET_TYPE(, double, 0, 2, const SyclDevice)
#undef PACKET_TYPE
template<> struct PacketType<half, const SyclDevice>: PacketType<half, SyclDevice>{};
template<> struct PacketType<const half, const SyclDevice>: PacketType<half, SyclDevice>{};
#endif
#endif
// Tuple mimics std::pair but works on e.g. nvcc. // Tuple mimics std::pair but works on e.g. nvcc.
template <typename U, typename V> struct Tuple { template <typename U, typename V> struct Tuple {
@ -124,7 +222,7 @@ template <typename U, typename V> struct Tuple {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Tuple& operator= (const Tuple& rhs) { Tuple& operator= (const Tuple& rhs) {
#ifndef __SYCL_DEVICE_ONLY__ #ifndef SYCL_DEVICE_ONLY
if (&rhs == this) return *this; if (&rhs == this) return *this;
#endif #endif
first = rhs.first; first = rhs.first;

View File

@ -37,7 +37,7 @@ struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprTyp
template<typename NewDimensions, typename XprType> template<typename NewDimensions, typename XprType>
struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense> struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
{ {
typedef const TensorReshapingOp<NewDimensions, XprType>& type; typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
}; };
template<typename NewDimensions, typename XprType> template<typename NewDimensions, typename XprType>
@ -106,6 +106,9 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
static const int NumOutputDims = internal::array_size<Dimensions>::value; static const int NumOutputDims = internal::array_size<Dimensions>::value;
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
@ -168,7 +171,7 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
return m_impl.evalSubExprsIfNeeded(data); return m_impl.evalSubExprsIfNeeded(data);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
@ -326,10 +329,18 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
} }
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return const_cast<Scalar*>(m_impl.data()); } EIGEN_DEVICE_FUNC typename Storage::Type data() const {
return constCast(m_impl.data());
}
EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif
protected: protected:
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
NewDimensions m_dimensions; NewDimensions m_dimensions;
@ -404,7 +415,7 @@ struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<Xp
template<typename StartIndices, typename Sizes, typename XprType> template<typename StartIndices, typename Sizes, typename XprType>
struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense> struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
{ {
typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type; typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
}; };
template<typename StartIndices, typename Sizes, typename XprType> template<typename StartIndices, typename Sizes, typename XprType>
@ -488,7 +499,7 @@ template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> {
// It is very expensive to start the memcpy kernel on GPU: we therefore only // It is very expensive to start the memcpy kernel on GPU: we therefore only
// use it for large copies. // use it for large copies.
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
template <typename Index> struct MemcpyTriggerForSlicing<Index, const Eigen::SyclDevice> { template <typename Index> struct MemcpyTriggerForSlicing<Index, Eigen::SyclDevice> {
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { } EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { }
EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; } EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
}; };
@ -508,6 +519,9 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef Sizes Dimensions; typedef Sizes Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
// Alignment can't be guaranteed at compile time since it depends on the // Alignment can't be guaranteed at compile time since it depends on the
@ -575,7 +589,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
&& data && m_impl.data() && data && m_impl.data()
@ -599,10 +613,10 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
// Use memcpy if it's going to be faster than using the regular evaluation. // Use memcpy if it's going to be faster than using the regular evaluation.
const MemcpyTriggerForSlicing<Index, Device> trigger(m_device); const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
if (trigger(contiguous_values)) { if (trigger(contiguous_values)) {
Scalar* src = (Scalar*)m_impl.data(); EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
Index offset = srcCoeff(i); Index offset = srcCoeff(i);
m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar)); m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values * sizeof(Scalar));
} }
return false; return false;
} }
@ -637,6 +651,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
Index inputIndices[] = {0, 0}; Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1}; Index indices[] = {index, index + packetSize - 1};
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx0 = indices[0] / m_fastOutputStrides[i]; const Index idx0 = indices[0] / m_fastOutputStrides[i];
const Index idx1 = indices[1] / m_fastOutputStrides[i]; const Index idx1 = indices[1] / m_fastOutputStrides[i];
@ -648,6 +663,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
inputIndices[0] += (indices[0] + m_offsets[0]); inputIndices[0] += (indices[0] + m_offsets[0]);
inputIndices[1] += (indices[1] + m_offsets[0]); inputIndices[1] += (indices[1] + m_offsets[0]);
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx0 = indices[0] / m_fastOutputStrides[i]; const Index idx0 = indices[0] / m_fastOutputStrides[i];
const Index idx1 = indices[1] / m_fastOutputStrides[i]; const Index idx1 = indices[1] / m_fastOutputStrides[i];
@ -667,6 +683,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
values[0] = m_impl.coeff(inputIndices[0]); values[0] = m_impl.coeff(inputIndices[0]);
values[packetSize-1] = m_impl.coeff(inputIndices[1]); values[packetSize-1] = m_impl.coeff(inputIndices[1]);
EIGEN_UNROLL_LOOP
for (int i = 1; i < packetSize-1; ++i) { for (int i = 1; i < packetSize-1; ++i) {
values[i] = coeff(index+i); values[i] = coeff(index+i);
} }
@ -698,8 +715,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
m_impl.block(&input_block); m_impl.block(&input_block);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
Scalar* result = const_cast<Scalar*>(m_impl.data()); typename Storage::Type result = constCast(m_impl.data());
if (result) { if (result) {
Index offset = 0; Index offset = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
@ -733,19 +750,19 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
} }
return NULL; return NULL;
} }
/// used by sycl #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{ // binding placeholder accessors to a command group handler for SYCL
return m_impl; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
} m_impl.bind(cgh);
/// used by sycl
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& startIndices() const{
return m_offsets;
} }
#endif
protected: protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
{ {
Index inputIndex = 0; Index inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_fastOutputStrides[i]; const Index idx = index / m_fastOutputStrides[i];
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
@ -753,6 +770,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
} }
inputIndex += (index + m_offsets[0]); inputIndex += (index + m_offsets[0]);
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / m_fastOutputStrides[i]; const Index idx = index / m_fastOutputStrides[i];
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
@ -767,7 +785,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
array<Index, NumDims> m_inputStrides; array<Index, NumDims> m_inputStrides;
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
Dimensions m_dimensions; Dimensions m_dimensions;
bool m_is_identity; bool m_is_identity;
const StartIndices m_offsets; const StartIndices m_offsets;
@ -829,6 +847,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
Index inputIndices[] = {0, 0}; Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1}; Index indices[] = {index, index + packetSize - 1};
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
@ -840,6 +859,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
inputIndices[0] += (indices[0] + this->m_offsets[0]); inputIndices[0] += (indices[0] + this->m_offsets[0]);
inputIndices[1] += (indices[1] + this->m_offsets[0]); inputIndices[1] += (indices[1] + this->m_offsets[0]);
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
@ -859,6 +879,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
internal::pstore<CoeffReturnType, PacketReturnType>(values, x); internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
this->m_impl.coeffRef(inputIndices[0]) = values[0]; this->m_impl.coeffRef(inputIndices[0]) = values[0];
this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
EIGEN_UNROLL_LOOP
for (int i = 1; i < packetSize-1; ++i) { for (int i = 1; i < packetSize-1; ++i) {
this->coeffRef(index+i) = values[i]; this->coeffRef(index+i) = values[i];
} }
@ -892,7 +913,7 @@ struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprTyp
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense> struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
{ {
typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type; typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
}; };
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
@ -969,6 +990,8 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
typedef Strides Dimensions; typedef Strides Dimensions;
enum { enum {
@ -985,8 +1008,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), : m_impl(op.expression(), device),
m_device(device), m_device(device),
m_strides(op.strides()), m_exprStartIndices(op.startIndices()), m_strides(op.strides())
m_exprStopIndices(op.stopIndices())
{ {
// Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped; DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
@ -1069,7 +1091,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -1091,30 +1113,28 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims); return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
return NULL; return NULL;
} }
#ifdef EIGEN_USE_SYCL
//use by sycl // binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStartIndices() const { return m_exprStartIndices; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
//use by sycl m_impl.bind(cgh);
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStopIndices() const { return m_exprStopIndices; } }
//use by sycl #endif
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& strides() const { return m_strides; }
/// used by sycl
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;}
protected: protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
{ {
Index inputIndex = 0; Index inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i >= 0; --i) { for (int i = NumDims - 1; i >= 0; --i) {
const Index idx = index / m_fastOutputStrides[i]; const Index idx = index / m_fastOutputStrides[i];
inputIndex += idx * m_inputStrides[i] + m_offsets[i]; inputIndex += idx * m_inputStrides[i] + m_offsets[i];
index -= idx * m_outputStrides[i]; index -= idx * m_outputStrides[i];
} }
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims; ++i) { for (int i = 0; i < NumDims; ++i) {
const Index idx = index / m_fastOutputStrides[i]; const Index idx = index / m_fastOutputStrides[i];
inputIndex += idx * m_inputStrides[i] + m_offsets[i]; inputIndex += idx * m_inputStrides[i] + m_offsets[i];
@ -1125,7 +1145,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
} }
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
#ifndef __SYCL_DEVICE_ONLY__ #ifndef SYCL_DEVICE_ONLY
return numext::maxi(min, numext::mini(max,value)); return numext::maxi(min, numext::mini(max,value));
#else #else
return cl::sycl::clamp(value, min, max); return cl::sycl::clamp(value, min, max);
@ -1137,15 +1157,11 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
array<Index, NumDims> m_inputStrides; array<Index, NumDims> m_inputStrides;
bool m_is_identity; bool m_is_identity;
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
DSizes<Index, NumDims> m_startIndices; // clamped startIndices DSizes<Index, NumDims> m_startIndices; // clamped startIndices
DSizes<Index, NumDims> m_dimensions; DSizes<Index, NumDims> m_dimensions;
DSizes<Index, NumDims> m_offsets; // offset in a flattened shape DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
const Strides m_strides; const Strides m_strides;
//use by sycl
const StartIndices m_exprStartIndices;
//use by sycl
const StopIndices m_exprStopIndices;
}; };
// Eval as lvalue // Eval as lvalue

View File

@ -92,6 +92,8 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = true, IsAligned = true,
@ -138,7 +140,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -151,6 +153,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
eigen_assert(index < dimensions().TotalSize()); eigen_assert(index < dimensions().TotalSize());
Index inputIndex = 0; Index inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStrides[i]; const Index idx = index / m_outputStrides[i];
if (isPaddingAtIndexForDim(idx, i)) { if (isPaddingAtIndexForDim(idx, i)) {
@ -164,6 +167,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
} }
inputIndex += (index - m_padding[0].first); inputIndex += (index - m_padding[0].first);
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / m_outputStrides[i+1]; const Index idx = index / m_outputStrides[i+1];
if (isPaddingAtIndexForDim(idx, i)) { if (isPaddingAtIndexForDim(idx, i)) {
@ -192,23 +196,25 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
TensorOpCost cost = m_impl.costPerCoeff(vectorized); TensorOpCost cost = m_impl.costPerCoeff(vectorized);
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims; ++i) for (int i = 0; i < NumDims; ++i)
updateCostPerDimension(cost, i, i == 0); updateCostPerDimension(cost, i, i == 0);
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i >= 0; --i) for (int i = NumDims - 1; i >= 0; --i)
updateCostPerDimension(cost, i, i == NumDims - 1); updateCostPerDimension(cost, i, i == NumDims - 1);
} }
return cost; return cost;
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
/// used by sycl #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PaddingDimensions& padding() const { return m_padding; } // binding placeholder accessors to a command group handler for SYCL
/// used by sycl EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& padding_value() const { return m_paddingValue; } m_impl.bind(cgh);
/// used by sycl }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;} #endif
private: private:
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
@ -272,6 +278,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
const Index initialIndex = index; const Index initialIndex = index;
Index inputIndex = 0; Index inputIndex = 0;
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index firstIdx = index; const Index firstIdx = index;
const Index lastIdx = index + PacketSize - 1; const Index lastIdx = index + PacketSize - 1;
@ -329,7 +336,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
const Index initialIndex = index; const Index initialIndex = index;
Index inputIndex = 0; Index inputIndex = 0;
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index firstIdx = index; const Index firstIdx = index;
const Index lastIdx = index + PacketSize - 1; const Index lastIdx = index + PacketSize - 1;
@ -383,6 +390,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
{ {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i); values[i] = coeff(index+i);
} }

View File

@ -89,6 +89,8 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
@ -103,9 +105,6 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device) : m_impl(op.expression(), device)
#ifdef EIGEN_USE_SYCL
, m_patch_dims(op.patch_dims())
#endif
{ {
Index num_patches = 1; Index num_patches = 1;
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
@ -149,7 +148,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -167,6 +166,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index]; Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index];
Index inputIndex = 0; Index inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 2; i > 0; --i) { for (int i = NumDims - 2; i > 0; --i) {
const Index patchIdx = patchIndex / m_patchStrides[i]; const Index patchIdx = patchIndex / m_patchStrides[i];
patchIndex -= patchIdx * m_patchStrides[i]; patchIndex -= patchIdx * m_patchStrides[i];
@ -175,6 +175,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i];
} }
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 2; ++i) { for (int i = 0; i < NumDims - 2; ++i) {
const Index patchIdx = patchIndex / m_patchStrides[i]; const Index patchIdx = patchIndex / m_patchStrides[i];
patchIndex -= patchIdx * m_patchStrides[i]; patchIndex -= patchIdx * m_patchStrides[i];
@ -202,6 +203,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
Index inputIndices[2] = {0, 0}; Index inputIndices[2] = {0, 0};
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 2; i > 0; --i) { for (int i = NumDims - 2; i > 0; --i) {
const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
patchIndices[1] / m_patchStrides[i]}; patchIndices[1] / m_patchStrides[i]};
@ -217,6 +219,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i];
} }
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 2; ++i) { for (int i = 0; i < NumDims - 2; ++i) {
const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
patchIndices[1] / m_patchStrides[i]}; patchIndices[1] / m_patchStrides[i]};
@ -243,6 +246,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
values[0] = m_impl.coeff(inputIndices[0]); values[0] = m_impl.coeff(inputIndices[0]);
values[PacketSize-1] = m_impl.coeff(inputIndices[1]); values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
EIGEN_UNROLL_LOOP
for (int i = 1; i < PacketSize-1; ++i) { for (int i = 1; i < PacketSize-1; ++i) {
values[i] = coeff(index+i); values[i] = coeff(index+i);
} }
@ -259,11 +263,13 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } // binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PatchDim& functor() const { return m_patch_dims; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif #endif
protected: protected:
@ -274,9 +280,6 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
#ifdef EIGEN_USE_SYCL
const PatchDim m_patch_dims;
#endif
}; };
} // end namespace Eigen } // end namespace Eigen

View File

@ -2,6 +2,7 @@
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
// Copyright (C) 2018 Mehdi Goli <eigen@codeplay.com> Codeplay Software Ltd.
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -44,6 +45,7 @@ EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
uint64_t rnd = ::random() ^ mach_absolute_time(); uint64_t rnd = ::random() ^ mach_absolute_time();
return rnd; return rnd;
#else #else
// Augment the current time with pseudo random number generation // Augment the current time with pseudo random number generation
// to ensure that we get different seeds if we try to generate seeds // to ensure that we get different seeds if we try to generate seeds
@ -147,14 +149,41 @@ template <typename T> class UniformRandomGenerator {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
uint64_t seed = 0) { uint64_t seed = 0) {
m_state = PCG_XSH_RS_state(seed); m_state = PCG_XSH_RS_state(seed);
#ifdef EIGEN_USE_SYCL
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
// Therefor, we need two step to initializate the m_state.
// IN SYCL, the constructor of the functor is s called on the CPU
// and we get the clock seed here from the CPU. However, This seed is
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
// and only available on the Operator() function (which is called on the GPU).
// Thus for CUDA (((CLOCK + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread
// but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds
// the (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction
// similar to CUDA Therefore, the thread Id injection is not available at this stage.
//However when the operator() is called the thread ID will be avilable. So inside the opeator,
// we add the thrreadID, BlockId,... (which is equivalent of i)
//to the seed and construct the unique m_state per thead similar to cuda.
m_exec_once =false;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
const UniformRandomGenerator& other) { const UniformRandomGenerator& other) {
m_state = other.m_state; m_state = other.m_state;
#ifdef EIGEN_USE_SYCL
m_exec_once =other.m_exec_once;
#endif
} }
template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
T operator()(Index i) const { T operator()(Index i) const {
#ifdef EIGEN_USE_SYCL
if(!m_exec_once) {
// This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
// The (i * 6364136223846793005ULL) is the remaining part of the PCG_XSH_RS_state on the GPU side
m_state += (i * 6364136223846793005ULL);
m_exec_once =true;
}
#endif
T result = RandomToTypeUniform<T>(&m_state, i); T result = RandomToTypeUniform<T>(&m_state, i);
return result; return result;
} }
@ -163,6 +192,14 @@ template <typename T> class UniformRandomGenerator {
Packet packetOp(Index i) const { Packet packetOp(Index i) const {
const int packetSize = internal::unpacket_traits<Packet>::size; const int packetSize = internal::unpacket_traits<Packet>::size;
EIGEN_ALIGN_MAX T values[packetSize]; EIGEN_ALIGN_MAX T values[packetSize];
#ifdef EIGEN_USE_SYCL
if(!m_exec_once) {
// This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
m_state += (i * 6364136223846793005ULL);
m_exec_once =true;
}
#endif
EIGEN_UNROLL_LOOP
for (int j = 0; j < packetSize; ++j) { for (int j = 0; j < packetSize; ++j) {
values[j] = RandomToTypeUniform<T>(&m_state, i); values[j] = RandomToTypeUniform<T>(&m_state, i);
} }
@ -171,6 +208,9 @@ template <typename T> class UniformRandomGenerator {
private: private:
mutable uint64_t m_state; mutable uint64_t m_state;
#ifdef EIGEN_USE_SYCL
mutable bool m_exec_once;
#endif
}; };
template <typename Scalar> template <typename Scalar>
@ -222,14 +262,37 @@ template <typename T> class NormalRandomGenerator {
// Uses the given "seed" if non-zero, otherwise uses a random seed. // Uses the given "seed" if non-zero, otherwise uses a random seed.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
m_state = PCG_XSH_RS_state(seed); m_state = PCG_XSH_RS_state(seed);
#ifdef EIGEN_USE_SYCL
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
// Therefor, we need two steps to initializate the m_state.
// IN SYCL, the constructor of the functor is s called on the CPU
// and we get the clock seed here from the CPU. However, This seed is
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
// and only available on the Operator() function (which is called on the GPU).
// Therefore, the thread Id injection is not available at this stage. However when the operator()
//is called the thread ID will be avilable. So inside the opeator,
// we add the thrreadID, BlockId,... (which is equivalent of i)
//to the seed and construct the unique m_state per thead similar to cuda.
m_exec_once =false;
#endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(
const NormalRandomGenerator& other) { const NormalRandomGenerator& other) {
m_state = other.m_state; m_state = other.m_state;
#ifdef EIGEN_USE_SYCL
m_exec_once=other.m_exec_once;
#endif
} }
template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
T operator()(Index i) const { T operator()(Index i) const {
#ifdef EIGEN_USE_SYCL
if(!m_exec_once) {
// This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
m_state += (i * 6364136223846793005ULL);
m_exec_once =true;
}
#endif
T result = RandomToTypeNormal<T>(&m_state, i); T result = RandomToTypeNormal<T>(&m_state, i);
return result; return result;
} }
@ -238,6 +301,14 @@ template <typename T> class NormalRandomGenerator {
Packet packetOp(Index i) const { Packet packetOp(Index i) const {
const int packetSize = internal::unpacket_traits<Packet>::size; const int packetSize = internal::unpacket_traits<Packet>::size;
EIGEN_ALIGN_MAX T values[packetSize]; EIGEN_ALIGN_MAX T values[packetSize];
#ifdef EIGEN_USE_SYCL
if(!m_exec_once) {
// This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
m_state += (i * 6364136223846793005ULL);
m_exec_once =true;
}
#endif
EIGEN_UNROLL_LOOP
for (int j = 0; j < packetSize; ++j) { for (int j = 0; j < packetSize; ++j) {
values[j] = RandomToTypeNormal<T>(&m_state, i); values[j] = RandomToTypeNormal<T>(&m_state, i);
} }
@ -246,6 +317,9 @@ template <typename T> class NormalRandomGenerator {
private: private:
mutable uint64_t m_state; mutable uint64_t m_state;
#ifdef EIGEN_USE_SYCL
mutable bool m_exec_once;
#endif
}; };

View File

@ -299,7 +299,7 @@ template <typename Self, typename Op, typename Device, bool Vectorizable = (Self
struct FullReducer { struct FullReducer {
static const bool HasOptimizedImplementation = false; static const bool HasOptimizedImplementation = false;
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) { static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::EvaluatorPointerType output) {
const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions()); const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
*output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer); *output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer);
} }
@ -400,6 +400,18 @@ struct OuterReducer {
} }
}; };
#ifdef EIGEN_USE_SYCL
// Default Generic reducer
template <typename Self, typename Op, typename Device>
struct GenericReducer {
static const bool HasOptimizedImplementation = false;
EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
eigen_assert(false && "Not implemented");
return true;
}
};
#endif
#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) #if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
template <int B, int N, typename S, typename R, typename I_> template <int B, int N, typename S, typename R, typename I_>
@ -423,6 +435,23 @@ template <int NPT, typename S, typename R, typename I_>
__global__ void OuterReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*); __global__ void OuterReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*);
#endif #endif
/**
* For SYCL, the return type of the reduction is deduced from the initialize method of the given Op.
* This allows the reduction to have a different type for the accumulator than the input data type.
* If this is the case, the functor needs to have two reduce method: one for reducing an element of the input
* with the accumulator and the other for reducing two accumulators.
* Such a reducer can be useful for instance when the accumulator is a boolean or a bitset that checks for
* some properties of the input.
*/
template <typename Op, typename CoeffReturnType>
struct ReductionReturnType {
#if EIGEN_HAS_CXX11 && defined(EIGEN_USE_SYCL)
typedef typename remove_const<decltype(std::declval<Op>().initialize())>::type type;
#else
typedef typename remove_const<CoeffReturnType>::type type;
#endif
};
template <typename Self, typename Op, template <typename Self, typename Op,
bool Vectorizable = bool Vectorizable =
(Self::InputPacketAccess & Self::ReducerTraits::PacketAccess)> (Self::InputPacketAccess & Self::ReducerTraits::PacketAccess)>
@ -520,12 +549,15 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType,
const Op m_reducer; const Op m_reducer;
}; };
template<typename ArgType, typename Device>
struct TensorReductionEvaluatorBase;
// Eval as rvalue // Eval as rvalue
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device> template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
{ {
typedef internal::reducer_traits<Op, Device> ReducerTraits; typedef internal::reducer_traits<Op, Device> ReducerTraits;
typedef Dims ReducedDims;
typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType; typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType;
typedef typename XprType::Index Index; typedef typename XprType::Index Index;
typedef ArgType ChildType; typedef ArgType ChildType;
@ -535,12 +567,20 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
static const int NumOutputDims = NumInputDims - NumReducedDims; static const int NumOutputDims = NumInputDims - NumReducedDims;
typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions; typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions;
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self; typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self;
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess; static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::ReductionReturnType<Op, typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const Index PacketSize = PacketType<CoeffReturnType, Device>::size; static const Index PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
// Subset of strides of the input tensor for the non-reduced dimensions.
// Indexed by output dimensions.
static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
enum { enum {
IsAligned = false, IsAligned = false,
PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess, PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess,
@ -562,11 +602,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value; static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
static const bool RunningFullReduction = (NumOutputDims==0); static const bool RunningFullReduction = (NumOutputDims==0);
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
#if defined(EIGEN_USE_SYCL)
, m_xpr_dims(op.dims())
#endif
{ {
EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
@ -653,7 +690,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
// of which will eventually result in an NVCC error // of which will eventually result in an NVCC error
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
#endif #endif
bool evalSubExprsIfNeeded(typename MakePointer_<CoeffReturnType>::Type data) { bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
// Use the FullReducer if possible. // Use the FullReducer if possible.
@ -663,7 +700,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
!RunningOnGPU))) { !RunningOnGPU))) {
bool need_assign = false; bool need_assign = false;
if (!data) { if (!data) {
m_result = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType))); m_result = static_cast<EvaluatorPointerType>(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType))));
data = m_result; data = m_result;
need_assign = true; need_assign = true;
} }
@ -671,20 +708,9 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data); internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data);
return need_assign; return need_assign;
} }
else if(RunningOnSycl){
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) {
data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
m_result = data;
}
Op reducer(m_reducer);
internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve);
return (m_result != NULL);
}
// Attempt to use an optimized reduction. // Attempt to use an optimized reduction.
else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) { else if ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || (RunningOnSycl)) {
bool reducing_inner_dims = true; bool reducing_inner_dims = true;
for (int i = 0; i < NumReducedDims; ++i) { for (int i = 0; i < NumReducedDims; ++i) {
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
@ -698,8 +724,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) { if (!data) {
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { if ((num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) || (RunningOnSycl)) {
data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); data = static_cast<EvaluatorPointerType>(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)));
m_result = data; m_result = data;
} }
else { else {
@ -707,6 +733,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
} }
} }
Op reducer(m_reducer); Op reducer(m_reducer);
// For SYCL this if always return false
if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
if (m_result) { if (m_result) {
m_device.deallocate_temp(m_result); m_device.deallocate_temp(m_result);
@ -731,8 +758,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) { if (!data) {
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { if ((num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) || (RunningOnSycl)) {
data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); data = static_cast<EvaluatorPointerType>(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)));
m_result = data; m_result = data;
} }
else { else {
@ -740,6 +767,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
} }
} }
Op reducer(m_reducer); Op reducer(m_reducer);
// For SYCL this if always return false
if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
if (m_result) { if (m_result) {
m_device.deallocate_temp(m_result); m_device.deallocate_temp(m_result);
@ -750,6 +778,21 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
return (m_result != NULL); return (m_result != NULL);
} }
} }
#if defined(EIGEN_USE_SYCL)
// If there is no Optimised version for SYCL, the reduction expression
// must break into two subexpression and use the SYCL generic Reducer on the device.
if(RunningOnSycl) {
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
if (!data) {
data = static_cast<EvaluatorPointerType>(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)));
m_result = data;
}
Op reducer(m_reducer);
internal::GenericReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve);
return (m_result != NULL);
}
#endif
} }
return true; return true;
} }
@ -764,7 +807,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
{ {
if ((RunningOnSycl || RunningFullReduction || RunningOnGPU) && m_result) { if (( RunningFullReduction || RunningOnGPU) && m_result ) {
return *(m_result + index); return *(m_result + index);
} }
Op reducer(m_reducer); Op reducer(m_reducer);
@ -1097,12 +1140,15 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
m_device.deallocate(reducers); m_device.deallocate(reducers);
} }
EIGEN_DEVICE_FUNC typename MakePointer_<CoeffReturnType>::Type data() const { return m_result; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; }
EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
#if defined(EIGEN_USE_SYCL) EIGEN_DEVICE_FUNC const Device& device() const { return m_device; }
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } #ifdef EIGEN_USE_SYCL
const Device& device() const { return m_device; } // binding placeholder accessors to a command group handler for SYCL
const Dims& xprDims() const { return m_xpr_dims; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
m_result.bind(cgh);
}
#endif #endif
private: private:
@ -1126,8 +1172,9 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
#endif #endif
#if defined(EIGEN_USE_SYCL) #if defined(EIGEN_USE_SYCL)
template < typename HostExpr_, typename FunctorExpr_, typename Tuple_of_Acc_, typename Dims_, typename Op_, typename Index_> friend class TensorSycl::internal::ReductionFunctor; template < typename Evaluator_, typename Op__> friend class TensorSycl::internal::ReductionFunctor;
template<typename CoeffReturnType_ ,typename OutAccessor_, typename HostExpr_, typename FunctorExpr_, typename Op_, typename Dims_, typename Index_, typename TupleType_> friend class TensorSycl::internal::FullReductionKernelFunctor; // SYCL need the Generic reducer for the case the recution algorithm is neither inner, outer, and full reducer
template <typename, typename, typename> friend struct internal::GenericReducer;
#endif #endif
@ -1255,9 +1302,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
// Precomputed strides for the output tensor. // Precomputed strides for the output tensor.
array<Index, NumOutputDims> m_outputStrides; array<Index, NumOutputDims> m_outputStrides;
array<internal::TensorIntDivisor<Index>, NumOutputDims> m_fastOutputStrides; array<internal::TensorIntDivisor<Index>, NumOutputDims> m_fastOutputStrides;
// Subset of strides of the input tensor for the non-reduced dimensions.
// Indexed by output dimensions.
static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
array<Index, NumPreservedStrides> m_preservedStrides; array<Index, NumPreservedStrides> m_preservedStrides;
// Map from output to input dimension index. // Map from output to input dimension index.
array<Index, NumOutputDims> m_output_to_input_dim_map; array<Index, NumOutputDims> m_output_to_input_dim_map;
@ -1288,13 +1332,36 @@ static const bool RunningOnGPU = false;
static const bool RunningOnGPU = false; static const bool RunningOnGPU = false;
static const bool RunningOnSycl = false; static const bool RunningOnSycl = false;
#endif #endif
typename MakePointer_<CoeffReturnType>::Type m_result; EvaluatorPointerType m_result;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
};
#if defined(EIGEN_USE_SYCL) template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
const Dims m_xpr_dims; struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
#endif : public TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> {
typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Base;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Device& device) : Base(op, device){}
};
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_>
struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Eigen::SyclDevice>
: public TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Eigen::SyclDevice> {
typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Eigen::SyclDevice> Base;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Eigen::SyclDevice& device) : Base(op, device){}
// The coeff function in the base the recursive method which is not an standard layout and cannot be used in the SYCL kernel
//Therefore the coeff function should be overridden by for SYCL kernel
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Base::CoeffReturnType coeff(typename Base::Index index) const {
return *(this->data() + index);
}
// The packet function in the base the recursive method which is not an standard layout and cannot be used in the SYCL kernel
//Therefore the packet function should be overridden by for SYCL kernel
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Base::PacketReturnType packet(typename Base::Index index) const {
return internal::pload<typename Base::PacketReturnType>(this->data() + index);
}
}; };
} // end namespace Eigen } // end namespace Eigen

View File

@ -44,6 +44,9 @@ class TensorLazyEvaluatorReadOnly : public TensorLazyBaseEvaluator<Dimensions, t
public: public:
// typedef typename TensorEvaluator<Expr, Device>::Dimensions Dimensions; // typedef typename TensorEvaluator<Expr, Device>::Dimensions Dimensions;
typedef typename TensorEvaluator<Expr, Device>::Scalar Scalar; typedef typename TensorEvaluator<Expr, Device>::Scalar Scalar;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
typedef TensorEvaluator<Expr, Device> EvalType;
TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) { TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) {
m_dims = m_impl.dimensions(); m_dims = m_impl.dimensions();
@ -79,6 +82,8 @@ class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimension
public: public:
typedef TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> Base; typedef TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> Base;
typedef typename Base::Scalar Scalar; typedef typename Base::Scalar Scalar;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) { TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) {
} }
@ -362,6 +367,8 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
typedef typename Derived::Scalar CoeffReturnType; typedef typename Derived::Scalar CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef typename Derived::Dimensions Dimensions; typedef typename Derived::Dimensions Dimensions;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -379,7 +386,7 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
return true; return true;
} }

View File

@ -109,6 +109,8 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -145,7 +147,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions() const { return m_dimensions; } const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -158,6 +160,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
eigen_assert(index < dimensions().TotalSize()); eigen_assert(index < dimensions().TotalSize());
Index inputIndex = 0; Index inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
Index idx = index / m_strides[i]; Index idx = index / m_strides[i];
index -= idx * m_strides[i]; index -= idx * m_strides[i];
@ -172,6 +175,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
inputIndex += index; inputIndex += index;
} }
} else { } else {
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
Index idx = index / m_strides[i]; Index idx = index / m_strides[i];
index -= idx * m_strides[i]; index -= idx * m_strides[i];
@ -205,6 +209,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
// local structure. // local structure.
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
values[PacketSize]; values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i); values[i] = coeff(index+i);
} }
@ -225,12 +230,14 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize); TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
/// required by sycl in order to extract the accessor #ifdef EIGEN_USE_SYCL
const TensorEvaluator<ArgType, Device> & impl() const { return m_impl; } // binding placeholder accessors to a command group handler for SYCL
/// added for sycl in order to construct the buffer from sycl device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
ReverseDimensions functor() const { return m_reverse; } m_impl.bind(cgh);
}
#endif
protected: protected:
Dimensions m_dimensions; Dimensions m_dimensions;
@ -285,11 +292,11 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
// This code is pilfered from TensorMorphing.h // This code is pilfered from TensorMorphing.h
EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x); internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
this->coeffRef(index+i) = values[i]; this->coeffRef(index+i) = values[i];
} }
} }
}; };

View File

@ -86,12 +86,15 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
typedef TensorScanOp<Op, ArgType> XprType; typedef TensorScanOp<Op, ArgType> XprType;
typedef typename XprType::Index Index; typedef typename XprType::Index Index;
typedef const ArgType ChildType;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions; typedef DSizes<Index, NumDims> Dimensions;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self; typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self;
typedef StorageMemory<Scalar, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -110,7 +113,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
m_exclusive(op.exclusive()), m_exclusive(op.exclusive()),
m_accumulator(op.accumulator()), m_accumulator(op.accumulator()),
m_size(m_impl.dimensions()[op.axis()]), m_size(m_impl.dimensions()[op.axis()]),
m_stride(1), m_stride(1), m_consume_dim(op.axis()),
m_output(NULL) { m_output(NULL) {
// Accumulating a scalar isn't supported. // Accumulating a scalar isn't supported.
@ -142,6 +145,10 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
return m_stride; return m_stride;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& consume_dim() const {
return m_consume_dim;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const {
return m_size; return m_size;
} }
@ -162,7 +169,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
return m_device; return m_device;
} }
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
ScanLauncher<Self, Op, Device> launcher; ScanLauncher<Self, Op, Device> launcher;
if (data) { if (data) {
@ -171,7 +178,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
} }
const Index total_size = internal::array_prod(dimensions()); const Index total_size = internal::array_prod(dimensions());
m_output = static_cast<CoeffReturnType*>(m_device.allocate(total_size * sizeof(Scalar))); m_output = static_cast<EvaluatorPointerType>(m_device.get((Scalar*) m_device.allocate_temp(total_size * sizeof(Scalar))));
launcher(*this, m_output); launcher(*this, m_output);
return true; return true;
} }
@ -181,7 +188,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
return internal::ploadt<PacketReturnType, LoadMode>(m_output + index); return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
{ {
return m_output; return m_output;
} }
@ -196,21 +203,29 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
if (m_output != NULL) { if (m_output) {
m_device.deallocate(m_output); m_device.deallocate_temp(m_output);
m_output = NULL; m_output = NULL;
} }
m_impl.cleanup(); m_impl.cleanup();
} }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
m_output.bind(cgh);
}
#endif
protected: protected:
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
const bool m_exclusive; const bool m_exclusive;
Op m_accumulator; Op m_accumulator;
const Index m_size; const Index m_size;
Index m_stride; Index m_stride;
CoeffReturnType* m_output; Index m_consume_dim;
EvaluatorPointerType m_output;
}; };
// CPU implementation of scan // CPU implementation of scan

View File

@ -109,6 +109,8 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -130,8 +132,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
const Device& device) const Device& device)
: m_device(device), : m_device(device),
m_impl(op.expression(), device), m_impl(op.expression(), device)
m_shuffle(op.shufflePermutation())
{ {
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
const Shuffle& shuffle = op.shufflePermutation(); const Shuffle& shuffle = op.shufflePermutation();
@ -172,7 +173,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -194,6 +195,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
static PacketReturnType Run(const Self& self, Index index) { static PacketReturnType Run(const Self& self, Index index) {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = self.coeff(index + i); values[i] = self.coeff(index + i);
} }
@ -210,6 +212,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
return self.m_impl.template packet<LoadMode>(index); return self.m_impl.template packet<LoadMode>(index);
} else { } else {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = self.coeff(index + i); values[i] = self.coeff(index + i);
} }
@ -330,13 +333,14 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
TensorOpCost(0, 0, compute_cost, m_is_identity /* vectorized */, PacketSize); TensorOpCost(0, 0, compute_cost, m_is_identity /* vectorized */, PacketSize);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
// required by sycl
EIGEN_STRONG_INLINE const Shuffle& shufflePermutation() const {return m_shuffle;}
// required by sycl
EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const {return m_impl;}
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif
protected: protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex( EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(
Index input_index, Index input_index,
@ -389,10 +393,8 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
array<Index, NumDims> m_inputStrides; array<Index, NumDims> m_inputStrides;
array<Index, NumDims> m_unshuffledInputStrides; array<Index, NumDims> m_unshuffledInputStrides;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
/// required by sycl
Shuffle m_shuffle;
}; };
@ -444,6 +446,7 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
internal::pstore<CoeffReturnType, PacketReturnType>(values, x); internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
this->coeffRef(index+i) = values[i]; this->coeffRef(index+i) = values[i];
} }

View File

@ -37,7 +37,7 @@ struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
template<typename Strides, typename XprType> template<typename Strides, typename XprType>
struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense> struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
{ {
typedef const TensorStridingOp<Strides, XprType>& type; typedef const TensorStridingOp<Strides, XprType>EIGEN_DEVICE_REF type;
}; };
template<typename Strides, typename XprType> template<typename Strides, typename XprType>
@ -108,6 +108,8 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
@ -120,7 +122,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
}; };
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_strides(op.strides()) : m_impl(op.expression(), device)
{ {
m_dimensions = m_impl.dimensions(); m_dimensions = m_impl.dimensions();
for (int i = 0; i < NumDims; ++i) { for (int i = 0; i < NumDims; ++i) {
@ -149,9 +151,10 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
} }
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType/*data*/) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -173,6 +176,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
Index inputIndices[] = {0, 0}; Index inputIndices[] = {0, 0};
Index indices[] = {index, index + PacketSize - 1}; Index indices[] = {index, index + PacketSize - 1};
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx0 = indices[0] / m_outputStrides[i]; const Index idx0 = indices[0] / m_outputStrides[i];
const Index idx1 = indices[1] / m_outputStrides[i]; const Index idx1 = indices[1] / m_outputStrides[i];
@ -184,6 +188,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
inputIndices[0] += indices[0] * m_inputStrides[0]; inputIndices[0] += indices[0] * m_inputStrides[0];
inputIndices[1] += indices[1] * m_inputStrides[0]; inputIndices[1] += indices[1] * m_inputStrides[0];
} else { // RowMajor } else { // RowMajor
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx0 = indices[0] / m_outputStrides[i]; const Index idx0 = indices[0] / m_outputStrides[i];
const Index idx1 = indices[1] / m_outputStrides[i]; const Index idx1 = indices[1] / m_outputStrides[i];
@ -203,6 +208,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
values[0] = m_impl.coeff(inputIndices[0]); values[0] = m_impl.coeff(inputIndices[0]);
values[PacketSize-1] = m_impl.coeff(inputIndices[1]); values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
EIGEN_UNROLL_LOOP
for (int i = 1; i < PacketSize-1; ++i) { for (int i = 1; i < PacketSize-1; ++i) {
values[i] = coeff(index+i); values[i] = coeff(index+i);
} }
@ -225,18 +231,20 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
/// required by sycl in order to extract the accessor
Strides functor() const { return m_strides; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif
protected: protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
{ {
Index inputIndex = 0; Index inputIndex = 0;
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStrides[i]; const Index idx = index / m_outputStrides[i];
inputIndex += idx * m_inputStrides[i]; inputIndex += idx * m_inputStrides[i];
@ -244,6 +252,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
} }
inputIndex += index * m_inputStrides[0]; inputIndex += index * m_inputStrides[0];
} else { // RowMajor } else { // RowMajor
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx = index / m_outputStrides[i]; const Index idx = index / m_outputStrides[i];
inputIndex += idx * m_inputStrides[i]; inputIndex += idx * m_inputStrides[i];
@ -258,7 +267,6 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
array<Index, NumDims> m_outputStrides; array<Index, NumDims> m_outputStrides;
array<Index, NumDims> m_inputStrides; array<Index, NumDims> m_inputStrides;
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
const Strides m_strides;
}; };
// Eval as lvalue // Eval as lvalue
@ -296,11 +304,6 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
return this->m_impl.coeffRef(this->srcCoeff(index)); return this->m_impl.coeffRef(this->srcCoeff(index));
} }
/// required by sycl in order to extract the accessor
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return this->m_impl; }
/// required by sycl in order to extract the accessor
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Strides functor() const { return this->m_strides; }
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketReturnType& x) void writePacket(Index index, const PacketReturnType& x)
{ {
@ -310,6 +313,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
Index inputIndices[] = {0, 0}; Index inputIndices[] = {0, 0};
Index indices[] = {index, index + PacketSize - 1}; Index indices[] = {index, index + PacketSize - 1};
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
EIGEN_UNROLL_LOOP
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const Index idx0 = indices[0] / this->m_outputStrides[i]; const Index idx0 = indices[0] / this->m_outputStrides[i];
const Index idx1 = indices[1] / this->m_outputStrides[i]; const Index idx1 = indices[1] / this->m_outputStrides[i];
@ -321,6 +325,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
inputIndices[0] += indices[0] * this->m_inputStrides[0]; inputIndices[0] += indices[0] * this->m_inputStrides[0];
inputIndices[1] += indices[1] * this->m_inputStrides[0]; inputIndices[1] += indices[1] * this->m_inputStrides[0];
} else { // RowMajor } else { // RowMajor
EIGEN_UNROLL_LOOP
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const Index idx0 = indices[0] / this->m_outputStrides[i]; const Index idx0 = indices[0] / this->m_outputStrides[i];
const Index idx1 = indices[1] / this->m_outputStrides[i]; const Index idx1 = indices[1] / this->m_outputStrides[i];
@ -340,6 +345,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
internal::pstore<Scalar, PacketReturnType>(values, x); internal::pstore<Scalar, PacketReturnType>(values, x);
this->m_impl.coeffRef(inputIndices[0]) = values[0]; this->m_impl.coeffRef(inputIndices[0]) = values[0];
this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1]; this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
EIGEN_UNROLL_LOOP
for (int i = 1; i < PacketSize-1; ++i) { for (int i = 1; i < PacketSize-1; ++i) {
this->coeffRef(index+i) = values[i]; this->coeffRef(index+i) = values[i];
} }

View File

@ -91,6 +91,8 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -205,7 +207,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
return m_dimensions; return m_dimensions;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -249,6 +251,13 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
return result; return result;
} }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif
protected: protected:
// Given the output index, finds the first index in the input tensor used to compute the trace // Given the output index, finds the first index in the input tensor used to compute the trace
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
@ -276,7 +285,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
// Initialize the size of the trace dimension // Initialize the size of the trace dimension
Index m_traceDim; Index m_traceDim;
const Device& m_device; const Device EIGEN_DEVICE_REF m_device;
array<bool, NumInputDims> m_reduced; array<bool, NumInputDims> m_reduced;
array<Index, NumReducedDims> m_reducedDims; array<Index, NumReducedDims> m_reducedDims;
array<Index, NumOutputDims> m_outputStrides; array<Index, NumOutputDims> m_outputStrides;

View File

@ -58,9 +58,6 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
}; };
template <typename T> struct MakePointer { template <typename T> struct MakePointer {
typedef T* Type; typedef T* Type;
typedef T& RefType;
typedef T ScalarType;
}; };
typedef typename MakePointer<Scalar>::Type PointerType; typedef typename MakePointer<Scalar>::Type PointerType;
}; };
@ -80,9 +77,6 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
}; };
template <typename T> struct MakePointer { template <typename T> struct MakePointer {
typedef T* Type; typedef T* Type;
typedef T& RefType;
typedef T ScalarType;
}; };
typedef typename MakePointer<Scalar>::Type PointerType; typedef typename MakePointer<Scalar>::Type PointerType;
}; };
@ -106,10 +100,6 @@ struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
// Intermediate typedef to workaround MSVC issue. // Intermediate typedef to workaround MSVC issue.
typedef MakePointer_<T> MakePointerT; typedef MakePointer_<T> MakePointerT;
typedef typename MakePointerT::Type Type; typedef typename MakePointerT::Type Type;
typedef typename MakePointerT::RefType RefType;
typedef typename MakePointerT::ScalarType ScalarType;
}; };
typedef typename MakePointer<Scalar>::Type PointerType; typedef typename MakePointer<Scalar>::Type PointerType;
}; };
@ -135,49 +125,49 @@ struct traits<TensorRef<PlainObjectType> >
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_> template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense> struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
{ {
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
}; };
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_> template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense> struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
{ {
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type; typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
}; };
template<typename Scalar_, typename Dimensions, int Options, typename IndexType_> template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
struct eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense> struct eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
{ {
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>EIGEN_DEVICE_REF type;
}; };
template<typename Scalar_, typename Dimensions, int Options, typename IndexType_> template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense> struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
{ {
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>EIGEN_DEVICE_REF type;
}; };
template<typename PlainObjectType, int Options, template <class> class MakePointer> template<typename PlainObjectType, int Options, template <class> class MakePointer>
struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense> struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
{ {
typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; typedef const TensorMap<PlainObjectType, Options, MakePointer>EIGEN_DEVICE_REF type;
}; };
template<typename PlainObjectType, int Options, template <class> class MakePointer> template<typename PlainObjectType, int Options, template <class> class MakePointer>
struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense> struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
{ {
typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; typedef const TensorMap<PlainObjectType, Options, MakePointer>EIGEN_DEVICE_REF type;
}; };
template<typename PlainObjectType> template<typename PlainObjectType>
struct eval<TensorRef<PlainObjectType>, Eigen::Dense> struct eval<TensorRef<PlainObjectType>, Eigen::Dense>
{ {
typedef const TensorRef<PlainObjectType>& type; typedef const TensorRef<PlainObjectType>EIGEN_DEVICE_REF type;
}; };
template<typename PlainObjectType> template<typename PlainObjectType>
struct eval<const TensorRef<PlainObjectType>, Eigen::Dense> struct eval<const TensorRef<PlainObjectType>, Eigen::Dense>
{ {
typedef const TensorRef<PlainObjectType>& type; typedef const TensorRef<PlainObjectType>EIGEN_DEVICE_REF type;
}; };
// TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector. // TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector.
@ -189,50 +179,50 @@ template<typename T, int n=1, typename PlainObject = void> struct nested
template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_> template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
{ {
typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type; typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>EIGEN_DEVICE_REF type;
}; };
template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_> template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
struct nested<const Tensor<Scalar_, NumIndices_, Options_, IndexType_> > struct nested<const Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
{ {
typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type; typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>EIGEN_DEVICE_REF type;
}; };
template <typename Scalar_, typename Dimensions, int Options, typename IndexType_> template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
struct nested<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> > struct nested<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
{ {
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>EIGEN_DEVICE_REF type;
}; };
template <typename Scalar_, typename Dimensions, int Options, typename IndexType_> template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> > struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
{ {
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type; typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>EIGEN_DEVICE_REF type;
}; };
template <typename PlainObjectType, int Options, template <class> class MakePointer> template <typename PlainObjectType, int Options, template <class> class MakePointer>
struct nested<TensorMap<PlainObjectType, Options, MakePointer> > struct nested<TensorMap<PlainObjectType, Options, MakePointer> >
{ {
typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; typedef const TensorMap<PlainObjectType, Options, MakePointer>EIGEN_DEVICE_REF type;
}; };
template <typename PlainObjectType, int Options, template <class> class MakePointer> template <typename PlainObjectType, int Options, template <class> class MakePointer>
struct nested<const TensorMap<PlainObjectType, Options, MakePointer> > struct nested<const TensorMap<PlainObjectType, Options, MakePointer> >
{ {
typedef const TensorMap<PlainObjectType, Options, MakePointer>& type; typedef const TensorMap<PlainObjectType, Options, MakePointer>EIGEN_DEVICE_REF type;
}; };
template <typename PlainObjectType> template <typename PlainObjectType>
struct nested<TensorRef<PlainObjectType> > struct nested<TensorRef<PlainObjectType> >
{ {
typedef const TensorRef<PlainObjectType>& type; typedef const TensorRef<PlainObjectType>EIGEN_DEVICE_REF type;
}; };
template <typename PlainObjectType> template <typename PlainObjectType>
struct nested<const TensorRef<PlainObjectType> > struct nested<const TensorRef<PlainObjectType> >
{ {
typedef const TensorRef<PlainObjectType>& type; typedef const TensorRef<PlainObjectType>EIGEN_DEVICE_REF type;
}; };
} // end namespace internal } // end namespace internal

View File

@ -91,24 +91,6 @@ class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows,
m_padding_left(padding_left), m_padding_right(padding_right), m_padding_left(padding_left), m_padding_right(padding_right),
m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
#ifdef EIGEN_USE_SYCL // this is work around for sycl as Eigen could not use c++11 deligate constructor feature
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
bool padding_explicit, DenseIndex padding_top_z, DenseIndex padding_bottom_z,
DenseIndex padding_top, DenseIndex padding_bottom, DenseIndex padding_left,
DenseIndex padding_right, PaddingType padding_type, Scalar padding_value)
: m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides),
m_col_inflate_strides(col_inflate_strides), m_padding_explicit(padding_explicit), m_padding_top_z(padding_top_z),
m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom), m_padding_left(padding_left),
m_padding_right(padding_right), m_padding_type(padding_type), m_padding_value(padding_value) {}
#endif
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
DenseIndex patch_planes() const { return m_patch_planes; } DenseIndex patch_planes() const { return m_patch_planes; }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -195,6 +177,8 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef StorageMemory<CoeffReturnType, Device> Storage;
typedef typename Storage::Type EvaluatorPointerType;
enum { enum {
IsAligned = false, IsAligned = false,
@ -205,16 +189,9 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
CoordAccess = false, CoordAccess = false,
RawAccess = false RawAccess = false
}; };
#ifdef __SYCL_DEVICE_ONLY__
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType op, const Device& device)
#else
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
#endif
: m_impl(op.expression(), device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) :
#ifdef EIGEN_USE_SYCL m_impl(op.expression(), device)
, m_op(op)
#endif
{ {
EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE);
@ -368,9 +345,10 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]); m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
} }
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
m_impl.evalSubExprsIfNeeded(NULL); m_impl.evalSubExprsIfNeeded(NULL);
return true; return true;
} }
@ -531,14 +509,10 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
return TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); return TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; } EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
#ifdef EIGEN_USE_SYCL
// Required by SYCL in order to construct the expression on the device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& xpr() const { return m_op; }
#endif
Index planePaddingTop() const { return m_planePaddingTop; } Index planePaddingTop() const { return m_planePaddingTop; }
Index rowPaddingTop() const { return m_rowPaddingTop; } Index rowPaddingTop() const { return m_rowPaddingTop; }
@ -556,10 +530,17 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
Index rowInflateStride() const { return m_row_inflate_strides; } Index rowInflateStride() const { return m_row_inflate_strides; }
Index colInflateStride() const { return m_col_inflate_strides; } Index colInflateStride() const { return m_col_inflate_strides; }
#ifdef EIGEN_USE_SYCL
// binding placeholder accessors to a command group handler for SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
m_impl.bind(cgh);
}
#endif
protected: protected:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
{ {
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
EIGEN_UNROLL_LOOP
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
values[i] = coeff(index+i); values[i] = coeff(index+i);
} }
@ -635,10 +616,6 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
#ifdef EIGEN_USE_SYCL
// Required by SYCL in order to construct the expression on the device
XprType m_op;
#endif
}; };

View File

@ -527,26 +527,32 @@ static void test_execute_generator_op(Device d)
} }
} }
#ifdef EIGEN_DONT_VECTORIZE
#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
#else
#define VECTORIZABLE(VAL) VAL
#endif
#define CALL_SUBTEST_PART(PART) \ #define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART CALL_SUBTEST_##PART
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \ #define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, false, ColMajor>(default_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, false, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, true, ColMajor>(default_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, true, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, true, false, ColMajor>(default_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), false, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, true, true, ColMajor>(default_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), true, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, false, RowMajor>(default_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, false, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, true, RowMajor>(default_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, true, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, true, false, RowMajor>(default_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), false, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, true, true, RowMajor>(default_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), true, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, false, ColMajor>(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, false, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, true, ColMajor>(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, true, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, true, false, ColMajor>(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), false, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, true, true, ColMajor>(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), true, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, false, RowMajor>(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, false, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, true, RowMajor>(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, true, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, true, false, RowMajor>(tp_device))); \ CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), false, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, true, true, RowMajor>(tp_device))) CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), true, RowMajor>(tp_device)))
EIGEN_DECLARE_TEST(cxx11_tensor_executor) { EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
Eigen::DefaultDevice default_device; Eigen::DefaultDevice default_device;

View File

@ -51,7 +51,8 @@ static void test_static_reshape() {
// New dimensions: [2, 3, 7] // New dimensions: [2, 3, 7]
Eigen::IndexList<type2index<2>, type2index<3>, type2index<7>> dim; Eigen::IndexList<type2index<2>, type2index<3>, type2index<7>> dim;
Tensor<float, 3> reshaped = tensor.reshape(dim); Tensor<float, 3> reshaped = tensor.reshape(static_cast<Eigen::DSizes<long,3>>(dim));
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) { for (int j = 0; j < 3; ++j) {