mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 17:49:36 +08:00
[SYCL] This PR adds the minimum modifications to the Eigen unsupported module required to run it on devices supporting SYCL.
* Abstracting the pointer type so that both SYCL memory and pointer can be captured. * Converting SYCL virtual pointer to SYCL device memory in Eigen evaluator class. * Binding SYCL placeholder accessor to command group handler by using bind method in Eigen evaluator node. * Adding SYCL macro for controlling loop unrolling. * Modifying the TensorDeviceSycl.h and SYCL executor method to adopt the above changes.
This commit is contained in:
parent
16a56b2ddd
commit
7d08fa805a
@ -37,7 +37,7 @@ struct traits<TensorIndexTupleOp<XprType> > : public traits<XprType>
|
|||||||
template<typename XprType>
|
template<typename XprType>
|
||||||
struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense>
|
struct eval<TensorIndexTupleOp<XprType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorIndexTupleOp<XprType>& type;
|
typedef const TensorIndexTupleOp<XprType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename XprType>
|
template<typename XprType>
|
||||||
@ -82,6 +82,8 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
|||||||
|
|
||||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||||
static const int NumDims = internal::array_size<Dimensions>::value;
|
static const int NumDims = internal::array_size<Dimensions>::value;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||||
@ -100,7 +102,7 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
|||||||
return m_impl.dimensions();
|
return m_impl.dimensions();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -118,11 +120,11 @@ struct TensorEvaluator<const TensorIndexTupleOp<ArgType>, Device>
|
|||||||
return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1);
|
return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
return m_impl;
|
m_impl.bind(cgh);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -154,7 +156,7 @@ struct traits<TensorTupleReducerOp<ReduceOp, Dims, XprType> > : public traits<Xp
|
|||||||
template<typename ReduceOp, typename Dims, typename XprType>
|
template<typename ReduceOp, typename Dims, typename XprType>
|
||||||
struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
|
struct eval<TensorTupleReducerOp<ReduceOp, Dims, XprType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>& type;
|
typedef const TensorTupleReducerOp<ReduceOp, Dims, XprType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename ReduceOp, typename Dims, typename XprType>
|
template<typename ReduceOp, typename Dims, typename XprType>
|
||||||
@ -216,6 +218,9 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
|||||||
typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions;
|
typedef typename TensorEvaluator<const TensorIndexTupleOp<ArgType> , Device>::Dimensions InputDimensions;
|
||||||
static const int NumDims = internal::array_size<InputDimensions>::value;
|
static const int NumDims = internal::array_size<InputDimensions>::value;
|
||||||
typedef array<Index, NumDims> StrideDims;
|
typedef array<Index, NumDims> StrideDims;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
typedef StorageMemory<TupleType, Device> TupleStorageMem;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||||
@ -231,9 +236,6 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
|||||||
: m_orig_impl(op.expression(), device),
|
: m_orig_impl(op.expression(), device),
|
||||||
m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
|
m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device),
|
||||||
m_return_dim(op.return_dim())
|
m_return_dim(op.return_dim())
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
,m_device(device)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
gen_strides(m_orig_impl.dimensions(), m_strides);
|
gen_strides(m_orig_impl.dimensions(), m_strides);
|
||||||
if (Layout == static_cast<int>(ColMajor)) {
|
if (Layout == static_cast<int>(ColMajor)) {
|
||||||
@ -243,14 +245,17 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
|||||||
const Index total_size = internal::array_prod(m_orig_impl.dimensions());
|
const Index total_size = internal::array_prod(m_orig_impl.dimensions());
|
||||||
m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size;
|
m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size;
|
||||||
}
|
}
|
||||||
m_stride_div = (m_return_dim >= 0) ? m_strides[m_return_dim] : 1;
|
// If m_return_dim is not a valid index, returns 1 or this can crash on Windows.
|
||||||
|
m_stride_div = ((m_return_dim >= 0) &&
|
||||||
|
(m_return_dim < static_cast<Index>(m_strides.size())))
|
||||||
|
? m_strides[m_return_dim] : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
|
||||||
return m_impl.dimensions();
|
return m_impl.dimensions();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -263,15 +268,12 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
|||||||
return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div;
|
return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef EIGEN_USE_SYCL
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
|
#ifdef EIGEN_USE_SYCL
|
||||||
#else // following functions are required by sycl
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TupleType* data() const { return m_impl.data(); }
|
m_impl.bind(cgh);
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index return_dim() const {return m_return_dim;}
|
m_orig_impl.bind(cgh);
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StrideDims& strides() const {return m_strides;}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& stride_mod() const {return m_stride_mod;}
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& stride_div() const {return m_stride_div;}
|
|
||||||
const Device& device() const{return m_device;}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
|
||||||
@ -312,9 +314,6 @@ struct TensorEvaluator<const TensorTupleReducerOp<ReduceOp, Dims, ArgType>, Devi
|
|||||||
StrideDims m_strides;
|
StrideDims m_strides;
|
||||||
Index m_stride_mod;
|
Index m_stride_mod;
|
||||||
Index m_stride_div;
|
Index m_stride_div;
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
const Device& m_device;
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -97,6 +97,8 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
static const int NumDims = XprType::NumDims;
|
static const int NumDims = XprType::NumDims;
|
||||||
@ -136,7 +138,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
|||||||
return m_rightImpl.dimensions();
|
return m_rightImpl.dimensions();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
|
eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
|
||||||
m_leftImpl.evalSubExprsIfNeeded(NULL);
|
m_leftImpl.evalSubExprsIfNeeded(NULL);
|
||||||
// If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non
|
// If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non
|
||||||
@ -154,6 +156,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
|||||||
m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i);
|
m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
|
||||||
|
|
||||||
const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned;
|
const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned;
|
||||||
const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned;
|
const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned;
|
||||||
m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i));
|
m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i));
|
||||||
@ -199,13 +202,15 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
|||||||
m_leftImpl.writeBlock(*block);
|
m_leftImpl.writeBlock(*block);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_leftImpl.bind(cgh);
|
||||||
|
m_rightImpl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_leftImpl.data(); }
|
||||||
const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_leftImpl.data(); }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TensorEvaluator<LeftArgType, Device> m_leftImpl;
|
TensorEvaluator<LeftArgType, Device> m_leftImpl;
|
||||||
|
@ -841,7 +841,7 @@ struct TensorBlockView {
|
|||||||
const Scalar* data() const { return m_data; }
|
const Scalar* data() const { return m_data; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
Dimensions m_block_sizes, m_block_strides;
|
Dimensions m_block_sizes, m_block_strides;
|
||||||
const Scalar* m_data; // Not owned.
|
const Scalar* m_data; // Not owned.
|
||||||
Scalar* m_allocated_data; // Owned.
|
Scalar* m_allocated_data; // Owned.
|
||||||
|
@ -37,7 +37,7 @@ struct traits<TensorBroadcastingOp<Broadcast, XprType> > : public traits<XprType
|
|||||||
template<typename Broadcast, typename XprType>
|
template<typename Broadcast, typename XprType>
|
||||||
struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense>
|
struct eval<TensorBroadcastingOp<Broadcast, XprType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorBroadcastingOp<Broadcast, XprType>& type;
|
typedef const TensorBroadcastingOp<Broadcast, XprType> EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Broadcast, typename XprType>
|
template<typename Broadcast, typename XprType>
|
||||||
@ -105,7 +105,11 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout;
|
||||||
bool isCopy, nByOne, oneByN;
|
bool isCopy, nByOne, oneByN;
|
||||||
|
public:
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
@ -205,7 +209,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -238,6 +242,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
// TODO: attempt to speed this up. The integer divisions and modulo are slow
|
// TODO: attempt to speed this up. The integer divisions and modulo are slow
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexColMajor(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexColMajor(Index index) const {
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
||||||
@ -272,6 +277,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexRowMajor(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexRowMajor(Index index) const {
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
||||||
@ -376,6 +382,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
values[0] = m_impl.coeff(inputIndex);
|
values[0] = m_impl.coeff(inputIndex);
|
||||||
return internal::pload1<PacketReturnType>(values);
|
return internal::pload1<PacketReturnType>(values);
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
|
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
|
||||||
if (outputOffset + cur < m_outputStrides[endDim]) {
|
if (outputOffset + cur < m_outputStrides[endDim]) {
|
||||||
values[i] = m_impl.coeff(inputIndex);
|
values[i] = m_impl.coeff(inputIndex);
|
||||||
@ -410,6 +417,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
return m_impl.template packet<Unaligned>(inputIndex);
|
return m_impl.template packet<Unaligned>(inputIndex);
|
||||||
} else {
|
} else {
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
if (inputIndex > m_inputStrides[dim]-1) {
|
if (inputIndex > m_inputStrides[dim]-1) {
|
||||||
inputIndex = 0;
|
inputIndex = 0;
|
||||||
@ -441,6 +449,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
values[0] = m_impl.coeff(inputIndex);
|
values[0] = m_impl.coeff(inputIndex);
|
||||||
return internal::pload1<PacketReturnType>(values);
|
return internal::pload1<PacketReturnType>(values);
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
|
for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
|
||||||
if (outputOffset + cur < m_outputStrides[dim]) {
|
if (outputOffset + cur < m_outputStrides[dim]) {
|
||||||
values[i] = m_impl.coeff(inputIndex);
|
values[i] = m_impl.coeff(inputIndex);
|
||||||
@ -465,6 +474,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
const Index originalIndex = index;
|
const Index originalIndex = index;
|
||||||
|
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
||||||
@ -500,6 +510,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
} else {
|
} else {
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
values[0] = m_impl.coeff(inputIndex);
|
values[0] = m_impl.coeff(inputIndex);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < PacketSize; ++i) {
|
for (int i = 1; i < PacketSize; ++i) {
|
||||||
if (innermostLoc + i < m_impl.dimensions()[0]) {
|
if (innermostLoc + i < m_impl.dimensions()[0]) {
|
||||||
values[i] = m_impl.coeff(inputIndex+i);
|
values[i] = m_impl.coeff(inputIndex+i);
|
||||||
@ -521,6 +532,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
const Index originalIndex = index;
|
const Index originalIndex = index;
|
||||||
|
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
||||||
@ -556,6 +568,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
} else {
|
} else {
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
values[0] = m_impl.coeff(inputIndex);
|
values[0] = m_impl.coeff(inputIndex);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < PacketSize; ++i) {
|
for (int i = 1; i < PacketSize; ++i) {
|
||||||
if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) {
|
if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) {
|
||||||
values[i] = m_impl.coeff(inputIndex+i);
|
values[i] = m_impl.coeff(inputIndex+i);
|
||||||
@ -572,6 +585,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
costPerCoeff(bool vectorized) const {
|
costPerCoeff(bool vectorized) const {
|
||||||
double compute_cost = TensorOpCost::AddCost<Index>();
|
double compute_cost = TensorOpCost::AddCost<Index>();
|
||||||
if (!isCopy && NumDims > 0) {
|
if (!isCopy && NumDims > 0) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
compute_cost += TensorOpCost::DivCost<Index>();
|
compute_cost += TensorOpCost::DivCost<Index>();
|
||||||
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
if (internal::index_statically_eq<Broadcast>(i, 1)) {
|
||||||
@ -845,12 +859,17 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||||
|
|
||||||
Broadcast functor() const { return m_broadcast; }
|
Broadcast functor() const { return m_broadcast; }
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
private:
|
private:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void BroadcastBlock(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void BroadcastBlock(
|
||||||
const Dimensions& input_block_sizes,
|
const Dimensions& input_block_sizes,
|
||||||
@ -875,8 +894,8 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
const Broadcast m_broadcast;
|
const typename internal::remove_reference<Broadcast>::type m_broadcast;
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
array<Index, NumDims> m_outputStrides;
|
array<Index, NumDims> m_outputStrides;
|
||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
|
@ -38,7 +38,7 @@ struct traits<TensorChippingOp<DimId, XprType> > : public traits<XprType>
|
|||||||
template<DenseIndex DimId, typename XprType>
|
template<DenseIndex DimId, typename XprType>
|
||||||
struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
|
struct eval<TensorChippingOp<DimId, XprType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorChippingOp<DimId, XprType>& type;
|
typedef const TensorChippingOp<DimId, XprType> EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<DenseIndex DimId, typename XprType>
|
template<DenseIndex DimId, typename XprType>
|
||||||
@ -139,7 +139,8 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
// Alignment can't be guaranteed at compile time since it depends on the
|
// Alignment can't be guaranteed at compile time since it depends on the
|
||||||
@ -169,7 +170,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
OutputTensorBlock;
|
OutputTensorBlock;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_impl(op.expression(), device), m_dim(op.dim()), m_device(device), m_offset(op.offset())
|
: m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
eigen_assert(NumInputDims > m_dim.actualDim());
|
eigen_assert(NumInputDims > m_dim.actualDim());
|
||||||
@ -218,7 +219,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -243,6 +244,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
eigen_assert(m_stride == 1);
|
eigen_assert(m_stride == 1);
|
||||||
Index inputIndex = index * m_inputStride + m_inputOffset;
|
Index inputIndex = index * m_inputStride + m_inputOffset;
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = m_impl.coeff(inputIndex);
|
values[i] = m_impl.coeff(inputIndex);
|
||||||
inputIndex += m_inputStride;
|
inputIndex += m_inputStride;
|
||||||
@ -262,6 +264,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
} else {
|
} else {
|
||||||
// Cross the stride boundary. Fallback to slow path.
|
// Cross the stride boundary. Fallback to slow path.
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = coeff(index);
|
values[i] = coeff(index);
|
||||||
++index;
|
++index;
|
||||||
@ -349,26 +352,20 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
m_impl.block(&input_block);
|
m_impl.block(&input_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
|
||||||
CoeffReturnType* result = const_cast<CoeffReturnType*>(m_impl.data());
|
typename Storage::Type result = constCast(m_impl.data());
|
||||||
if (IsOuterChipping && result) {
|
if (IsOuterChipping && result) {
|
||||||
return result + m_inputOffset;
|
return result + m_inputOffset;
|
||||||
} else {
|
} else {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
/// used by sycl
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex dimId() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
return m_dim.actualDim();
|
m_impl.bind(cgh);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
/// used by sycl
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const DenseIndex& offset() const {
|
|
||||||
return m_offset;
|
|
||||||
}
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
||||||
@ -399,10 +396,7 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
DSizes<Index, NumInputDims> m_inputStrides;
|
DSizes<Index, NumInputDims> m_inputStrides;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const internal::DimensionId<DimId> m_dim;
|
const internal::DimensionId<DimId> m_dim;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
// required by sycl
|
|
||||||
const DenseIndex m_offset;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -466,6 +460,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||||
Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
|
Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
this->m_impl.coeffRef(inputIndex) = values[i];
|
this->m_impl.coeffRef(inputIndex) = values[i];
|
||||||
inputIndex += this->m_inputStride;
|
inputIndex += this->m_inputStride;
|
||||||
@ -484,6 +479,7 @@ struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
|
|||||||
// Cross stride boundary. Fallback to slow path.
|
// Cross stride boundary. Fallback to slow path.
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
this->coeffRef(index) = values[i];
|
this->coeffRef(index) = values[i];
|
||||||
++index;
|
++index;
|
||||||
|
@ -119,6 +119,8 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
|||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
|
||||||
@ -181,7 +183,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
// TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear?
|
// TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear?
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
|
||||||
{
|
{
|
||||||
m_leftImpl.evalSubExprsIfNeeded(NULL);
|
m_leftImpl.evalSubExprsIfNeeded(NULL);
|
||||||
m_rightImpl.evalSubExprsIfNeeded(NULL);
|
m_rightImpl.evalSubExprsIfNeeded(NULL);
|
||||||
@ -219,11 +221,13 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
|||||||
Index left_index;
|
Index left_index;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
left_index = subs[0];
|
left_index = subs[0];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < NumDims; ++i) {
|
for (int i = 1; i < NumDims; ++i) {
|
||||||
left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
|
left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
left_index = subs[NumDims - 1];
|
left_index = subs[NumDims - 1];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 2; i >= 0; --i) {
|
for (int i = NumDims - 2; i >= 0; --i) {
|
||||||
left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
|
left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
|
||||||
}
|
}
|
||||||
@ -235,11 +239,13 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
|||||||
Index right_index;
|
Index right_index;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
right_index = subs[0];
|
right_index = subs[0];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < NumDims; ++i) {
|
for (int i = 1; i < NumDims; ++i) {
|
||||||
right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
|
right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
right_index = subs[NumDims - 1];
|
right_index = subs[NumDims - 1];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 2; i >= 0; --i) {
|
for (int i = NumDims - 2; i >= 0; --i) {
|
||||||
right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
|
right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
|
||||||
}
|
}
|
||||||
@ -257,6 +263,7 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
|||||||
eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
|
eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
|
||||||
|
|
||||||
EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
|
EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < packetSize; ++i) {
|
for (int i = 0; i < packetSize; ++i) {
|
||||||
values[i] = coeff(index+i);
|
values[i] = coeff(index+i);
|
||||||
}
|
}
|
||||||
@ -279,13 +286,15 @@ struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgTy
|
|||||||
TensorOpCost(0, 0, compute_cost);
|
TensorOpCost(0, 0, compute_cost);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
|
#ifdef EIGEN_USE_SYCL
|
||||||
/// required by sycl in order to extract the accessor
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
/// required by sycl in order to extract the accessor
|
m_leftImpl.bind(cgh);
|
||||||
const Axis& axis() const { return m_axis; }
|
m_rightImpl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
|
@ -433,6 +433,8 @@ struct TensorContractionEvaluatorBase
|
|||||||
typedef typename XprType::Index Index;
|
typedef typename XprType::Index Index;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
|
typedef StorageMemory<Scalar, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
@ -453,6 +455,9 @@ struct TensorContractionEvaluatorBase
|
|||||||
typedef typename internal::conditional<
|
typedef typename internal::conditional<
|
||||||
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
|
static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;
|
||||||
|
|
||||||
|
typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluatorType;
|
||||||
|
typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluatorType;
|
||||||
|
|
||||||
static const int LDims =
|
static const int LDims =
|
||||||
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
|
internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;
|
||||||
static const int RDims =
|
static const int RDims =
|
||||||
@ -653,14 +658,14 @@ struct TensorContractionEvaluatorBase
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar * data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
m_leftImpl.evalSubExprsIfNeeded(NULL);
|
m_leftImpl.evalSubExprsIfNeeded(NULL);
|
||||||
m_rightImpl.evalSubExprsIfNeeded(NULL);
|
m_rightImpl.evalSubExprsIfNeeded(NULL);
|
||||||
if (data) {
|
if (data) {
|
||||||
evalTo(data);
|
evalTo(data);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
|
m_result = static_cast<EvaluatorPointerType>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));
|
||||||
evalTo(m_result);
|
evalTo(m_result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -934,7 +939,7 @@ struct TensorContractionEvaluatorBase
|
|||||||
return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
|
return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { return m_result; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return m_result; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void EnableXSMMIfPossible(const array<IndexPair<Index>, ContractDims>& eval_op_indices) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void EnableXSMMIfPossible(const array<IndexPair<Index>, ContractDims>& eval_op_indices) {
|
||||||
@ -1169,9 +1174,9 @@ protected:
|
|||||||
|
|
||||||
TensorEvaluator<EvalLeftArgType, Device> m_leftImpl;
|
TensorEvaluator<EvalLeftArgType, Device> m_leftImpl;
|
||||||
TensorEvaluator<EvalRightArgType, Device> m_rightImpl;
|
TensorEvaluator<EvalRightArgType, Device> m_rightImpl;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
OutputKernelType m_output_kernel;
|
OutputKernelType m_output_kernel;
|
||||||
Scalar* m_result;
|
EvaluatorPointerType m_result;
|
||||||
bool m_can_use_xsmm;
|
bool m_can_use_xsmm;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -59,6 +59,13 @@ struct CoeffLoader {
|
|||||||
return m_tensor.template packet<LoadMode>(index);
|
return m_tensor.template packet<LoadMode>(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// The placeholder accessors require to be bound to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_tensor.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Tensor m_tensor;
|
const Tensor m_tensor;
|
||||||
};
|
};
|
||||||
@ -87,6 +94,13 @@ struct CoeffLoader<Tensor, true, MakePointer_> {
|
|||||||
{
|
{
|
||||||
return internal::ploadt_ro<typename Tensor::PacketReturnType, LoadMode>(m_data + index);
|
return internal::ploadt_ro<typename Tensor::PacketReturnType, LoadMode>(m_data + index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// The placeholder accessors require to be bound to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_data.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
private:
|
private:
|
||||||
typedef typename Tensor::Scalar Scalar;
|
typedef typename Tensor::Scalar Scalar;
|
||||||
|
|
||||||
@ -139,6 +153,7 @@ class SimpleTensorContractionMapper {
|
|||||||
EIGEN_UNUSED_VARIABLE(left); // annoying bug in g++8.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85963
|
EIGEN_UNUSED_VARIABLE(left); // annoying bug in g++8.1: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85963
|
||||||
Index nocontract_val = left ? row : col;
|
Index nocontract_val = left ? row : col;
|
||||||
Index linidx = 0;
|
Index linidx = 0;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
|
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
|
||||||
const Index idx = nocontract_val / m_ij_strides[i];
|
const Index idx = nocontract_val / m_ij_strides[i];
|
||||||
linidx += idx * m_nocontract_strides[i];
|
linidx += idx * m_nocontract_strides[i];
|
||||||
@ -155,6 +170,7 @@ class SimpleTensorContractionMapper {
|
|||||||
|
|
||||||
Index contract_val = left ? col : row;
|
Index contract_val = left ? col : row;
|
||||||
if(array_size<contract_t>::value > 0) {
|
if(array_size<contract_t>::value > 0) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
|
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
|
||||||
const Index idx = contract_val / m_k_strides[i];
|
const Index idx = contract_val / m_k_strides[i];
|
||||||
linidx += idx * m_contract_strides[i];
|
linidx += idx * m_contract_strides[i];
|
||||||
@ -179,6 +195,7 @@ class SimpleTensorContractionMapper {
|
|||||||
Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
|
Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
|
||||||
Index linidx[2] = {0, 0};
|
Index linidx[2] = {0, 0};
|
||||||
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
|
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
|
for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
|
||||||
const Index idx0 = nocontract_val[0] / m_ij_strides[i];
|
const Index idx0 = nocontract_val[0] / m_ij_strides[i];
|
||||||
const Index idx1 = nocontract_val[1] / m_ij_strides[i];
|
const Index idx1 = nocontract_val[1] / m_ij_strides[i];
|
||||||
@ -199,6 +216,7 @@ class SimpleTensorContractionMapper {
|
|||||||
|
|
||||||
Index contract_val[2] = {left ? col : row, left ? col : row + distance};
|
Index contract_val[2] = {left ? col : row, left ? col : row + distance};
|
||||||
if (array_size<contract_t>::value> 0) {
|
if (array_size<contract_t>::value> 0) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
|
for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
|
||||||
const Index idx0 = contract_val[0] / m_k_strides[i];
|
const Index idx0 = contract_val[0] / m_k_strides[i];
|
||||||
const Index idx1 = contract_val[1] / m_k_strides[i];
|
const Index idx1 = contract_val[1] / m_k_strides[i];
|
||||||
@ -230,6 +248,13 @@ class SimpleTensorContractionMapper {
|
|||||||
return ((side == Lhs) && inner_dim_contiguous && array_size<contract_t>::value > 0) ? m_contract_strides[0] : 1;
|
return ((side == Lhs) && inner_dim_contiguous && array_size<contract_t>::value > 0) ? m_contract_strides[0] : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// The placeholder accessors require to be bound to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_tensor.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
const CoeffLoader<Tensor, Tensor::RawAccess, MakePointer_>& tensor() const {
|
const CoeffLoader<Tensor, Tensor::RawAccess, MakePointer_>& tensor() const {
|
||||||
return m_tensor;
|
return m_tensor;
|
||||||
}
|
}
|
||||||
@ -302,6 +327,7 @@ class BaseTensorContractionMapper : public SimpleTensorContractionMapper<Scalar,
|
|||||||
EIGEN_ALIGN_MAX Scalar data[packet_size];
|
EIGEN_ALIGN_MAX Scalar data[packet_size];
|
||||||
|
|
||||||
data[0] = this->m_tensor.coeff(first);
|
data[0] = this->m_tensor.coeff(first);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (Index k = 1; k < packet_size - 1; k += 2) {
|
for (Index k = 1; k < packet_size - 1; k += 2) {
|
||||||
const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1);
|
const IndexPair<Index> internal_pair = this->computeIndexPair(i + k, j, 1);
|
||||||
data[k] = this->m_tensor.coeff(internal_pair.first);
|
data[k] = this->m_tensor.coeff(internal_pair.first);
|
||||||
@ -472,6 +498,13 @@ class TensorContractionSubMapper {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// The placeholder accessors require to be bound to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_base_mapper.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
const ParentMapper& base_mapper() const { return m_base_mapper; }
|
const ParentMapper& base_mapper() const { return m_base_mapper; }
|
||||||
Index vert_offset() const { return m_vert_offset; }
|
Index vert_offset() const { return m_vert_offset; }
|
||||||
Index horiz_offset() const { return m_horiz_offset; }
|
Index horiz_offset() const { return m_horiz_offset; }
|
||||||
@ -515,6 +548,22 @@ class TensorContractionInputMapper
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T> struct TensorContractionInputMapperTrait;
|
||||||
|
|
||||||
|
template<typename Scalar_, typename Index_, int side_,
|
||||||
|
typename Tensor_,
|
||||||
|
typename nocontract_t_, typename contract_t_,
|
||||||
|
int packet_size_,
|
||||||
|
bool inner_dim_contiguous_, bool inner_dim_reordered_, int Alignment_, template <class> class MakePointer_>
|
||||||
|
struct TensorContractionInputMapperTrait<TensorContractionInputMapper<Scalar_, Index_, side_, Tensor_,
|
||||||
|
nocontract_t_, contract_t_, packet_size_, inner_dim_contiguous_,
|
||||||
|
inner_dim_reordered_, Alignment_, MakePointer_> > {
|
||||||
|
|
||||||
|
typedef Tensor_ XprType;
|
||||||
|
static const bool inner_dim_contiguous = inner_dim_contiguous_;
|
||||||
|
static const bool inner_dim_reordered = inner_dim_reordered_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -129,6 +129,7 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
|
|||||||
typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
|
typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
|
||||||
internal::scalar_cast_op<SrcType, TgtType> converter;
|
internal::scalar_cast_op<SrcType, TgtType> converter;
|
||||||
EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
|
EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < TgtPacketSize; ++i) {
|
for (int i = 0; i < TgtPacketSize; ++i) {
|
||||||
values[i] = converter(m_impl.coeff(index+i));
|
values[i] = converter(m_impl.coeff(index+i));
|
||||||
}
|
}
|
||||||
@ -164,15 +165,15 @@ class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprT
|
|||||||
typename XprType::Nested m_xpr;
|
typename XprType::Nested m_xpr;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <bool SameType, typename Eval, typename Scalar> struct ConversionSubExprEval {
|
template <bool SameType, typename Eval, typename EvalPointerType> struct ConversionSubExprEval {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) {
|
||||||
impl.evalSubExprsIfNeeded(NULL);
|
impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Eval, typename Scalar> struct ConversionSubExprEval<true, Eval, Scalar> {
|
template <typename Eval, typename EvalPointerType> struct ConversionSubExprEval<true, Eval, EvalPointerType> {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) {
|
||||||
return impl.evalSubExprsIfNeeded(data);
|
return impl.evalSubExprsIfNeeded(data);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -207,6 +208,7 @@ struct PacketConv {
|
|||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
|
||||||
internal::scalar_cast_op<SrcType, TargetType> converter;
|
internal::scalar_cast_op<SrcType, TargetType> converter;
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = converter(impl.coeff(index+i));
|
values[i] = converter(impl.coeff(index+i));
|
||||||
}
|
}
|
||||||
@ -267,10 +269,18 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
|||||||
typedef typename PacketType<SrcType, Device>::type PacketSourceType;
|
typedef typename PacketType<SrcType, Device>::type PacketSourceType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
|
static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = true,
|
PacketAccess =
|
||||||
|
#ifndef EIGEN_USE_SYCL
|
||||||
|
true,
|
||||||
|
#else
|
||||||
|
TensorEvaluator<ArgType, Device>::PacketAccess &
|
||||||
|
internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
|
||||||
|
#endif
|
||||||
BlockAccess = false,
|
BlockAccess = false,
|
||||||
PreferBlockAccess = false,
|
PreferBlockAccess = false,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
@ -284,9 +294,9 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
|
||||||
{
|
{
|
||||||
return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, Scalar>::run(m_impl, data);
|
return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType>::run(m_impl, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
|
||||||
@ -330,10 +340,16 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
/// required by sycl in order to extract the sycl accessor
|
/// required by sycl in order to extract the sycl accessor
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
|
@ -303,6 +303,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<Scalar, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned & TensorEvaluator<KernelArgType, Device>::IsAligned,
|
||||||
@ -469,7 +471,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
PacketSize));
|
PacketSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
|
||||||
@ -525,7 +527,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
m_local_kernel = false;
|
m_local_kernel = false;
|
||||||
} else {
|
} else {
|
||||||
size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
|
size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar);
|
||||||
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
Scalar* local = (Scalar*)m_device.allocate_temp(kernel_sz);
|
||||||
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
||||||
EvalTo evalToTmp(local, m_kernelArg);
|
EvalTo evalToTmp(local, m_kernelArg);
|
||||||
const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
|
const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
|
||||||
@ -548,7 +550,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
KernelArgType m_kernelArg;
|
KernelArgType m_kernelArg;
|
||||||
const Scalar* m_kernel;
|
const Scalar* m_kernel;
|
||||||
bool m_local_kernel;
|
bool m_local_kernel;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ struct traits<TensorCustomUnaryOp<CustomUnaryFunc, XprType> >
|
|||||||
template<typename CustomUnaryFunc, typename XprType>
|
template<typename CustomUnaryFunc, typename XprType>
|
||||||
struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense>
|
struct eval<TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>& type;
|
typedef const TensorCustomUnaryOp<CustomUnaryFunc, XprType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename CustomUnaryFunc, typename XprType>
|
template<typename CustomUnaryFunc, typename XprType>
|
||||||
@ -88,7 +88,9 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
|||||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
|
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -108,20 +110,20 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
if (data) {
|
if (data) {
|
||||||
evalTo(data);
|
evalTo(data);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
m_result = static_cast<PointerT>(
|
m_result = static_cast<EvaluatorPointerType>(m_device.get( (CoeffReturnType*)
|
||||||
m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
|
m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))));
|
||||||
evalTo(m_result);
|
evalTo(m_result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
if (m_result != NULL) {
|
if (m_result) {
|
||||||
m_device.deallocate_temp(m_result);
|
m_device.deallocate_temp(m_result);
|
||||||
m_result = NULL;
|
m_result = NULL;
|
||||||
}
|
}
|
||||||
@ -141,22 +143,25 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
|
|||||||
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_result.bind(cgh);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
|
EIGEN_DEVICE_FUNC void evalTo(EvaluatorPointerType data) {
|
||||||
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions);
|
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(m_device.get(data), m_dimensions);
|
||||||
m_op.func().eval(m_op.expression(), result, m_device);
|
m_op.func().eval(m_op.expression(), result, m_device);
|
||||||
}
|
}
|
||||||
|
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
const ArgType m_op;
|
const ArgType m_op;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
PointerT m_result;
|
EvaluatorPointerType m_result;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -251,7 +256,10 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
|||||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
|
|
||||||
|
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -271,12 +279,13 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
if (data) {
|
if (data) {
|
||||||
evalTo(data);
|
evalTo(data);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType)));
|
m_result = static_cast<EvaluatorPointerType>(m_device.get( (CoeffReturnType*)
|
||||||
|
m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType))));
|
||||||
evalTo(m_result);
|
evalTo(m_result);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -303,22 +312,25 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
|
|||||||
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_result.bind(cgh);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
|
EIGEN_DEVICE_FUNC void evalTo(EvaluatorPointerType data) {
|
||||||
TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions);
|
TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(m_device.get(data), m_dimensions);
|
||||||
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
|
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
|
||||||
}
|
}
|
||||||
|
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
const XprType m_op;
|
const XprType m_op;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
PointerT m_result;
|
EvaluatorPointerType m_result;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,6 +39,10 @@ struct DefaultDevice {
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||||
::memset(buffer, c, n);
|
::memset(buffer, c, n);
|
||||||
}
|
}
|
||||||
|
template<typename Type>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||||
#if !defined(EIGEN_GPU_COMPILE_PHASE)
|
#if !defined(EIGEN_GPU_COMPILE_PHASE)
|
||||||
@ -54,7 +58,7 @@ struct DefaultDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
|
||||||
#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__)
|
#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY)
|
||||||
// Running on the host CPU
|
// Running on the host CPU
|
||||||
return l1CacheSize();
|
return l1CacheSize();
|
||||||
#elif defined(EIGEN_HIP_DEVICE_COMPILE)
|
#elif defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||||
@ -67,7 +71,7 @@ struct DefaultDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
|
||||||
#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__)
|
#if !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY)
|
||||||
// Running single threaded on the host CPU
|
// Running single threaded on the host CPU
|
||||||
return l3CacheSize();
|
return l3CacheSize();
|
||||||
#elif defined(EIGEN_HIP_DEVICE_COMPILE)
|
#elif defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||||
|
@ -215,6 +215,10 @@ struct GpuDevice {
|
|||||||
stream_->deallocate(buffer);
|
stream_->deallocate(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Type>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void* scratchpad() const {
|
EIGEN_STRONG_INLINE void* scratchpad() const {
|
||||||
return stream_->scratchpad();
|
return stream_->scratchpad();
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -76,6 +76,11 @@ struct ThreadPoolDevice {
|
|||||||
deallocate(buffer);
|
deallocate(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Type>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Type get(Type data) const {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
#ifdef __ANDROID__
|
#ifdef __ANDROID__
|
||||||
::memcpy(dst, src, n);
|
::memcpy(dst, src, n);
|
||||||
|
@ -42,7 +42,6 @@ struct traits<TensorEvalToOp<XprType, MakePointer_> >
|
|||||||
// Intermediate typedef to workaround MSVC issue.
|
// Intermediate typedef to workaround MSVC issue.
|
||||||
typedef MakePointer_<T> MakePointerT;
|
typedef MakePointer_<T> MakePointerT;
|
||||||
typedef typename MakePointerT::Type Type;
|
typedef typename MakePointerT::Type Type;
|
||||||
typedef typename MakePointerT::RefType RefType;
|
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
@ -103,7 +102,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
|||||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
enum {
|
enum {
|
||||||
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
@ -115,22 +116,16 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
|||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_impl(op.expression(), device), m_device(device),
|
: m_impl(op.expression(), device), m_buffer(device.get(op.buffer())), m_expression(op.expression()){}
|
||||||
m_buffer(op.buffer()), m_op(op), m_expression(op.expression())
|
|
||||||
{ }
|
|
||||||
|
|
||||||
// Used for accessor extraction in SYCL Managed TensorMap:
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const {
|
|
||||||
return m_op;
|
|
||||||
}
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() {
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef typename internal::traits<const TensorEvalToOp<ArgType, MakePointer_> >::template MakePointer<CoeffReturnType>::Type DevicePointer;
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType scalar) {
|
||||||
EIGEN_UNUSED_VARIABLE(scalar);
|
EIGEN_UNUSED_VARIABLE(scalar);
|
||||||
eigen_assert(scalar == NULL);
|
eigen_assert(scalar == NULL);
|
||||||
return m_impl.evalSubExprsIfNeeded(m_buffer);
|
return m_impl.evalSubExprsIfNeeded(m_buffer);
|
||||||
@ -165,19 +160,20 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType, MakePointer_>, Device>
|
|||||||
TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
|
TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_buffer; }
|
||||||
ArgType expression() const { return m_expression; }
|
ArgType expression() const { return m_expression; }
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
m_buffer.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
|
||||||
/// added for sycl in order to construct the buffer from the sycl device
|
|
||||||
const Device& device() const{return m_device;}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const Device& m_device;
|
EvaluatorPointerType m_buffer;
|
||||||
DevicePointer m_buffer;
|
|
||||||
const XprType& m_op;
|
|
||||||
const ArgType m_expression;
|
const ArgType m_expression;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -34,6 +34,9 @@ struct TensorEvaluator
|
|||||||
typedef typename Derived::Dimensions Dimensions;
|
typedef typename Derived::Dimensions Dimensions;
|
||||||
typedef Derived XprType;
|
typedef Derived XprType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
|
||||||
|
typedef StorageMemory<Scalar, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
// NumDimensions is -1 for variable dim tensors
|
// NumDimensions is -1 for variable dim tensors
|
||||||
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
|
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
|
||||||
@ -60,16 +63,17 @@ struct TensorEvaluator
|
|||||||
TensorBlockWriter;
|
TensorBlockWriter;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
|
||||||
: m_data(const_cast<typename internal::traits<Derived>::template MakePointer<Scalar>::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m)
|
: m_data(device.get((const_cast<TensorPointerType>(m.data())))),
|
||||||
|
m_dims(m.dimensions()),
|
||||||
|
m_device(device)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
// Used for accessor extraction in SYCL Managed TensorMap:
|
|
||||||
const Derived& derived() const { return m_impl; }
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
|
||||||
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
|
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && dest) {
|
||||||
m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize());
|
m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -78,14 +82,12 @@ struct TensorEvaluator
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||||
eigen_assert(m_data);
|
eigen_assert(m_data != NULL);
|
||||||
return m_data[index];
|
return m_data[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
|
||||||
typename internal::traits<Derived>::template MakePointer<Scalar>::RefType
|
eigen_assert(m_data != NULL);
|
||||||
coeffRef(Index index) {
|
|
||||||
eigen_assert(m_data);
|
|
||||||
return m_data[index];
|
return m_data[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,7 +116,7 @@ struct TensorEvaluator
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
|
||||||
eigen_assert(m_data);
|
eigen_assert(m_data != NULL);
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
return m_data[m_dims.IndexOfColMajor(coords)];
|
return m_data[m_dims.IndexOfColMajor(coords)];
|
||||||
} else {
|
} else {
|
||||||
@ -122,10 +124,9 @@ struct TensorEvaluator
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
|
||||||
typename internal::traits<Derived>::template MakePointer<Scalar>::RefType
|
|
||||||
coeffRef(const array<DenseIndex, NumCoords>& coords) {
|
coeffRef(const array<DenseIndex, NumCoords>& coords) {
|
||||||
eigen_assert(m_data);
|
eigen_assert(m_data != NULL);
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
return m_data[m_dims.IndexOfColMajor(coords)];
|
return m_data[m_dims.IndexOfColMajor(coords)];
|
||||||
} else {
|
} else {
|
||||||
@ -152,16 +153,18 @@ struct TensorEvaluator
|
|||||||
TensorBlockWriter::Run(block, m_data);
|
TensorBlockWriter::Run(block, m_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<Scalar>::Type data() const { return m_data; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
|
||||||
|
|
||||||
/// required by sycl in order to construct sycl buffer from raw pointer
|
|
||||||
const Device& device() const{return m_device;}
|
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_data.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
protected:
|
protected:
|
||||||
typename internal::traits<Derived>::template MakePointer<Scalar>::Type m_data;
|
EvaluatorPointerType m_data;
|
||||||
Dimensions m_dims;
|
Dimensions m_dims;
|
||||||
const Device& m_device;
|
const Device m_device;
|
||||||
const Derived& m_impl;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -184,6 +187,13 @@ Eigen::half loadConstant(const Eigen::half* address) {
|
|||||||
return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
|
return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// overload of load constant should be implemented here based on range access
|
||||||
|
template <cl::sycl::access::mode AcMd, typename T>
|
||||||
|
T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
|
||||||
|
return *address;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -197,7 +207,9 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
typedef typename Derived::Dimensions Dimensions;
|
typedef typename Derived::Dimensions Dimensions;
|
||||||
typedef const Derived XprType;
|
typedef const Derived XprType;
|
||||||
|
typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
|
||||||
|
typedef StorageMemory<const Scalar, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
// NumDimensions is -1 for variable dim tensors
|
// NumDimensions is -1 for variable dim tensors
|
||||||
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
|
static const int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
|
||||||
@ -221,18 +233,15 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
typename internal::remove_const<Scalar>::type, Index, NumCoords, Layout>
|
typename internal::remove_const<Scalar>::type, Index, NumCoords, Layout>
|
||||||
TensorBlockReader;
|
TensorBlockReader;
|
||||||
|
|
||||||
// Used for accessor extraction in SYCL Managed TensorMap:
|
|
||||||
const Derived& derived() const { return m_impl; }
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
|
||||||
: m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m)
|
: m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
|
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data) {
|
||||||
m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar));
|
m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -241,13 +250,8 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||||
eigen_assert(m_data);
|
eigen_assert(m_data != NULL);
|
||||||
#ifndef __SYCL_DEVICE_ONLY__
|
|
||||||
return loadConstant(m_data+index);
|
return loadConstant(m_data+index);
|
||||||
#else
|
|
||||||
CoeffReturnType tmp = m_data[index];
|
|
||||||
return tmp;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
@ -269,7 +273,7 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
|
||||||
eigen_assert(m_data);
|
eigen_assert(m_data != NULL);
|
||||||
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
|
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
|
||||||
: m_dims.IndexOfRowMajor(coords);
|
: m_dims.IndexOfRowMajor(coords);
|
||||||
return loadConstant(m_data+index);
|
return loadConstant(m_data+index);
|
||||||
@ -288,16 +292,17 @@ struct TensorEvaluator<const Derived, Device>
|
|||||||
TensorBlockReader::Run(block, m_data);
|
TensorBlockReader::Run(block, m_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::template MakePointer<const Scalar>::Type data() const { return m_data; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
/// added for sycl in order to construct the buffer from the sycl device
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
const Device& device() const{return m_device;}
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_data.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
protected:
|
protected:
|
||||||
typename internal::traits<Derived>::template MakePointer<const Scalar>::Type m_data;
|
EvaluatorPointerType m_data;
|
||||||
Dimensions m_dims;
|
Dimensions m_dims;
|
||||||
const Device& m_device;
|
const Device m_device;
|
||||||
const Derived& m_impl;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -310,16 +315,6 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
|||||||
{
|
{
|
||||||
typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
|
typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
|
||||||
|
|
||||||
enum {
|
|
||||||
IsAligned = true,
|
|
||||||
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess,
|
|
||||||
BlockAccess = false,
|
|
||||||
PreferBlockAccess = false,
|
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
|
||||||
CoordAccess = false, // to be implemented
|
|
||||||
RawAccess = false
|
|
||||||
};
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
TensorEvaluator(const XprType& op, const Device& device)
|
TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
|
: m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
|
||||||
@ -331,10 +326,26 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
|||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
IsAligned = true,
|
||||||
|
PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
&& (PacketType<CoeffReturnType, Device>::size >1)
|
||||||
|
#endif
|
||||||
|
,
|
||||||
|
BlockAccess = false,
|
||||||
|
PreferBlockAccess = false,
|
||||||
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
|
CoordAccess = false, // to be implemented
|
||||||
|
RawAccess = false
|
||||||
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||||
@ -354,13 +365,14 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
|||||||
PacketType<CoeffReturnType, Device>::size);
|
PacketType<CoeffReturnType, Device>::size);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; }
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
NullaryOp functor() const { return m_functor; }
|
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_argImpl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const NullaryOp m_functor;
|
const NullaryOp m_functor;
|
||||||
@ -401,14 +413,15 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
|||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
static const int NumDims = internal::array_size<Dimensions>::value;
|
static const int NumDims = internal::array_size<Dimensions>::value;
|
||||||
typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
|
typedef internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>
|
||||||
TensorBlock;
|
TensorBlock;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
m_argImpl.evalSubExprsIfNeeded(NULL);
|
m_argImpl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -456,16 +469,18 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
|||||||
arg_block.data());
|
arg_block.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
#ifdef EIGEN_USE_SYCL
|
||||||
const TensorEvaluator<ArgType, Device> & impl() const { return m_argImpl; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
/// added for sycl in order to construct the buffer from sycl device
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{
|
||||||
UnaryOp functor() const { return m_functor; }
|
m_argImpl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Device& m_device;
|
const Device m_device;
|
||||||
const UnaryOp m_functor;
|
const UnaryOp m_functor;
|
||||||
TensorEvaluator<ArgType, Device> m_argImpl;
|
TensorEvaluator<ArgType, Device> m_argImpl;
|
||||||
};
|
};
|
||||||
@ -509,6 +524,8 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
|||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
static const int NumDims = internal::array_size<
|
static const int NumDims = internal::array_size<
|
||||||
typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
|
typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
|
||||||
@ -524,7 +541,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
|||||||
return m_leftImpl.dimensions();
|
return m_leftImpl.dimensions();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
m_leftImpl.evalSubExprsIfNeeded(NULL);
|
m_leftImpl.evalSubExprsIfNeeded(NULL);
|
||||||
m_rightImpl.evalSubExprsIfNeeded(NULL);
|
m_rightImpl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
@ -576,16 +593,17 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
|||||||
right_block.block_strides(), right_block.data());
|
right_block.block_strides(), right_block.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<LeftArgType, Device>& left_impl() const { return m_leftImpl; }
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<RightArgType, Device>& right_impl() const { return m_rightImpl; }
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
BinaryOp functor() const { return m_functor; }
|
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_leftImpl.bind(cgh);
|
||||||
|
m_rightImpl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
private:
|
private:
|
||||||
const Device& m_device;
|
const Device m_device;
|
||||||
const BinaryOp m_functor;
|
const BinaryOp m_functor;
|
||||||
TensorEvaluator<LeftArgType, Device> m_leftImpl;
|
TensorEvaluator<LeftArgType, Device> m_leftImpl;
|
||||||
TensorEvaluator<RightArgType, Device> m_rightImpl;
|
TensorEvaluator<RightArgType, Device> m_rightImpl;
|
||||||
@ -639,6 +657,8 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
|||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
|
||||||
{
|
{
|
||||||
@ -646,7 +666,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
|||||||
return m_arg1Impl.dimensions();
|
return m_arg1Impl.dimensions();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
m_arg1Impl.evalSubExprsIfNeeded(NULL);
|
m_arg1Impl.evalSubExprsIfNeeded(NULL);
|
||||||
m_arg2Impl.evalSubExprsIfNeeded(NULL);
|
m_arg2Impl.evalSubExprsIfNeeded(NULL);
|
||||||
m_arg3Impl.evalSubExprsIfNeeded(NULL);
|
m_arg3Impl.evalSubExprsIfNeeded(NULL);
|
||||||
@ -679,14 +699,16 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
|
|||||||
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
|
TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
#ifdef EIGEN_USE_SYCL
|
||||||
const TensorEvaluator<Arg1Type, Device> & arg1Impl() const { return m_arg1Impl; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
/// required by sycl in order to extract the accessor
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
const TensorEvaluator<Arg2Type, Device>& arg2Impl() const { return m_arg2Impl; }
|
m_arg1Impl.bind(cgh);
|
||||||
/// required by sycl in order to extract the accessor
|
m_arg2Impl.bind(cgh);
|
||||||
const TensorEvaluator<Arg3Type, Device>& arg3Impl() const { return m_arg3Impl; }
|
m_arg3Impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const TernaryOp m_functor;
|
const TernaryOp m_functor;
|
||||||
@ -731,6 +753,8 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
|||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
|
||||||
{
|
{
|
||||||
@ -738,7 +762,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
|||||||
return m_condImpl.dimensions();
|
return m_condImpl.dimensions();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
m_condImpl.evalSubExprsIfNeeded(NULL);
|
m_condImpl.evalSubExprsIfNeeded(NULL);
|
||||||
m_thenImpl.evalSubExprsIfNeeded(NULL);
|
m_thenImpl.evalSubExprsIfNeeded(NULL);
|
||||||
m_elseImpl.evalSubExprsIfNeeded(NULL);
|
m_elseImpl.evalSubExprsIfNeeded(NULL);
|
||||||
@ -758,12 +782,14 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
|||||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
|
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
|
||||||
{
|
{
|
||||||
internal::Selector<PacketSize> select;
|
internal::Selector<PacketSize> select;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (Index i = 0; i < PacketSize; ++i) {
|
for (Index i = 0; i < PacketSize; ++i) {
|
||||||
select.select[i] = m_condImpl.coeff(index+i);
|
select.select[i] = m_condImpl.coeff(index+i);
|
||||||
}
|
}
|
||||||
return internal::pblend(select,
|
return internal::pblend(select,
|
||||||
m_thenImpl.template packet<LoadMode>(index),
|
m_thenImpl.template packet<LoadMode>(index),
|
||||||
m_elseImpl.template packet<LoadMode>(index));
|
m_elseImpl.template packet<LoadMode>(index));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
|
||||||
@ -773,14 +799,16 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
|||||||
.cwiseMax(m_elseImpl.costPerCoeff(vectorized));
|
.cwiseMax(m_elseImpl.costPerCoeff(vectorized));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; }
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<ThenArgType, Device>& then_impl() const { return m_thenImpl; }
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<ElseArgType, Device>& else_impl() const { return m_elseImpl; }
|
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_condImpl.bind(cgh);
|
||||||
|
m_thenImpl.bind(cgh);
|
||||||
|
m_elseImpl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
private:
|
private:
|
||||||
TensorEvaluator<IfArgType, Device> m_condImpl;
|
TensorEvaluator<IfArgType, Device> m_condImpl;
|
||||||
TensorEvaluator<ThenArgType, Device> m_thenImpl;
|
TensorEvaluator<ThenArgType, Device> m_thenImpl;
|
||||||
|
@ -442,12 +442,133 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til
|
|||||||
// SYCL Executor policy
|
// SYCL Executor policy
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
|
||||||
template <typename Expression, bool Vectorizable>
|
template <bool Vectorizable, typename Evaluator>
|
||||||
class TensorExecutor<Expression, SyclDevice, Vectorizable> {
|
struct ExecExprFunctorKernel_impl {
|
||||||
|
typedef typename Evaluator::Index Index;
|
||||||
|
const Index range;
|
||||||
|
const Index vectorizable_threads;
|
||||||
|
Evaluator evaluator;
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ExecExprFunctorKernel_impl(
|
||||||
|
const Index range_, const Index vectorizable_threads_,
|
||||||
|
Evaluator evaluator_)
|
||||||
|
: range(range_), vectorizable_threads(vectorizable_threads_),
|
||||||
|
evaluator(evaluator_) {}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
|
||||||
|
operator()(cl::sycl::nd_item<1> itemID) {
|
||||||
|
Index gId = static_cast<Index>(itemID.get_global_linear_id());
|
||||||
|
Index total_threads = itemID.get_global_range(0);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
|
for (Index i = gId; i < range; i += total_threads) {
|
||||||
|
evaluator.evalScalar(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Evaluator>
|
||||||
|
struct ExecExprFunctorKernel_impl<true, Evaluator> {
|
||||||
|
typedef typename Evaluator::Index Index;
|
||||||
|
const Index range;
|
||||||
|
const Index vectorizable_threads;
|
||||||
|
Evaluator evaluator;
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ExecExprFunctorKernel_impl(
|
||||||
|
const Index range_, const Index vectorizable_threads_,
|
||||||
|
Evaluator evaluator_)
|
||||||
|
: range(range_), vectorizable_threads(vectorizable_threads_),
|
||||||
|
evaluator(evaluator_) {}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
|
||||||
|
operator()(cl::sycl::nd_item<1> itemID) {
|
||||||
|
Index gId = static_cast<Index>(itemID.get_global_linear_id());
|
||||||
|
if (gId < vectorizable_threads) {
|
||||||
|
const Index PacketSize = Eigen::internal::unpacket_traits<
|
||||||
|
typename Evaluator::PacketReturnType>::size;
|
||||||
|
evaluator.evalPacket(gId * PacketSize);
|
||||||
|
gId += (vectorizable_threads * PacketSize);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
|
for (Index i = gId; i < range; i += vectorizable_threads) {
|
||||||
|
evaluator.evalScalar(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expr, bool NonZeroVectoriseSize, typename Evaluator>
|
||||||
|
struct ExecExprFunctorKernel
|
||||||
|
: ExecExprFunctorKernel_impl<
|
||||||
|
::Eigen::internal::IsVectorizable<Eigen::SyclDevice, Expr>::value,
|
||||||
|
Evaluator> {
|
||||||
|
ExecExprFunctorKernel(const Index range_, const Index vectorizable_threads_,
|
||||||
|
const Evaluator &evaluator)
|
||||||
|
: ExecExprFunctorKernel_impl<
|
||||||
|
::Eigen::internal::IsVectorizable<Eigen::SyclDevice, Expr>::value,
|
||||||
|
Evaluator>(range_, vectorizable_threads_, evaluator) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expr, typename Evaluator>
|
||||||
|
struct ExecExprFunctorKernel<Expr, false, Evaluator>
|
||||||
|
: ExecExprFunctorKernel_impl<false, Evaluator> {
|
||||||
|
ExecExprFunctorKernel(const Index range_, const Index vectorizable_threads_,
|
||||||
|
const Evaluator &evaluator)
|
||||||
|
: ExecExprFunctorKernel_impl<false, Evaluator>(
|
||||||
|
range_, vectorizable_threads_, evaluator) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expression, bool Vectorizable, bool Tileable>
|
||||||
|
class TensorExecutor<Expression, Eigen::SyclDevice, Vectorizable, Tileable> {
|
||||||
public:
|
public:
|
||||||
static EIGEN_STRONG_INLINE void run(const Expression &expr, const SyclDevice &device) {
|
typedef typename Expression::Index Index;
|
||||||
// call TensorSYCL module
|
static EIGEN_STRONG_INLINE void run(const Expression &expr, const Eigen::SyclDevice &dev) {
|
||||||
TensorSycl::run(expr, device);
|
Eigen::TensorEvaluator<Expression, Eigen::SyclDevice> evaluator(expr, dev);
|
||||||
|
const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
|
||||||
|
if (needs_assign) {
|
||||||
|
Index range, GRange, tileSize;
|
||||||
|
Index total_size = ::Eigen::internal::array_prod(evaluator.dimensions());
|
||||||
|
total_size = (total_size == 0) ? 1 : total_size;
|
||||||
|
const int PacketSize = Eigen::PacketType<
|
||||||
|
typename Eigen::TensorEvaluator<Expression, Eigen::SyclDevice>::CoeffReturnType,
|
||||||
|
Eigen::SyclDevice>::size;
|
||||||
|
Index vectorizable_threads =
|
||||||
|
static_cast<Index>(total_size / PacketSize);
|
||||||
|
dev.parallel_for_setup(vectorizable_threads, tileSize, range, GRange);
|
||||||
|
range = total_size;
|
||||||
|
auto f = [&](cl::sycl::handler &cgh) {
|
||||||
|
evaluator.bind(cgh);
|
||||||
|
typedef ExecExprFunctorKernel<Expression, true,
|
||||||
|
Eigen::TensorEvaluator<Expression, Eigen::SyclDevice>>
|
||||||
|
conditional_vectorized_kernel;
|
||||||
|
|
||||||
|
typedef ExecExprFunctorKernel<Expression, false,
|
||||||
|
Eigen::TensorEvaluator<Expression, Eigen::SyclDevice>>
|
||||||
|
non_vectorized_kernel;
|
||||||
|
// This is to make sure that an expression with a size less than vectorized size
|
||||||
|
// will not call the vectorized kernel.
|
||||||
|
// The reason for having this kernel is that the vectorisable parameter is a
|
||||||
|
// compile-time parameter,
|
||||||
|
// however, the size of a tensor is a run-time parameter
|
||||||
|
(vectorizable_threads)
|
||||||
|
? cgh.parallel_for(
|
||||||
|
#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
|
||||||
|
dev.program().template get_kernel<vectorized_kernel>(),
|
||||||
|
#endif
|
||||||
|
cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange),
|
||||||
|
cl::sycl::range<1>(tileSize)),
|
||||||
|
conditional_vectorized_kernel(range, vectorizable_threads,
|
||||||
|
evaluator))
|
||||||
|
: cgh.parallel_for(
|
||||||
|
#ifdef EIGEN_SYCL_USE_PROGRAM_CLASS
|
||||||
|
dev.program().template get_kernel<non_vectorized_kernel>(),
|
||||||
|
#endif
|
||||||
|
cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange),
|
||||||
|
cl::sycl::range<1>(tileSize)),
|
||||||
|
non_vectorized_kernel(range, vectorizable_threads,
|
||||||
|
evaluator));
|
||||||
|
};
|
||||||
|
cl::sycl::event e;
|
||||||
|
EIGEN_SYCL_TRY_CATCH(e = dev.sycl_queue().submit(f));
|
||||||
|
dev.async_synchronize(e);
|
||||||
|
}
|
||||||
|
evaluator.cleanup();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -89,7 +89,10 @@ struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
|
|||||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||||
static const int NumDimensions = XprTraits::NumDimensions;
|
static const int NumDimensions = XprTraits::NumDimensions;
|
||||||
static const int Layout = XprTraits::Layout;
|
static const int Layout = XprTraits::Layout;
|
||||||
typedef typename TypeConversion<Scalar, typename XprTraits::PointerType>::type PointerType;
|
typedef typename TypeConversion<Scalar,
|
||||||
|
typename XprTraits::PointerType
|
||||||
|
>::type
|
||||||
|
PointerType;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename UnaryOp, typename XprType>
|
template<typename UnaryOp, typename XprType>
|
||||||
@ -166,7 +169,8 @@ struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
|
|||||||
typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
|
||||||
typename traits<LhsXprType>::PointerType,
|
typename traits<LhsXprType>::PointerType,
|
||||||
typename traits<RhsXprType>::PointerType>::type
|
typename traits<RhsXprType>::PointerType>::type
|
||||||
>::type PointerType;
|
>::type
|
||||||
|
PointerType;
|
||||||
enum {
|
enum {
|
||||||
Flags = 0
|
Flags = 0
|
||||||
};
|
};
|
||||||
@ -247,7 +251,8 @@ struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprT
|
|||||||
typename conditional<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val,
|
typename conditional<Pointer_type_promotion<typename Arg2XprType::Scalar, Scalar>::val,
|
||||||
typename traits<Arg2XprType>::PointerType,
|
typename traits<Arg2XprType>::PointerType,
|
||||||
typename traits<Arg3XprType>::PointerType>::type
|
typename traits<Arg3XprType>::PointerType>::type
|
||||||
>::type PointerType;
|
>::type
|
||||||
|
PointerType;
|
||||||
enum {
|
enum {
|
||||||
Flags = 0
|
Flags = 0
|
||||||
};
|
};
|
||||||
|
@ -131,6 +131,8 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
|||||||
typedef OutputScalar CoeffReturnType;
|
typedef OutputScalar CoeffReturnType;
|
||||||
typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
|
typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -167,13 +169,13 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
|||||||
return m_dimensions;
|
return m_dimensions;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
if (data) {
|
if (data) {
|
||||||
evalToBuf(data);
|
evalToBuf(data);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size);
|
m_data = (EvaluatorPointerType)m_device.get((CoeffReturnType*)(m_device.allocate_temp(sizeof(CoeffReturnType) * m_size)));
|
||||||
evalToBuf(m_data);
|
evalToBuf(m_data);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -202,11 +204,16 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
|||||||
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_data.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(EvaluatorPointerType data) {
|
||||||
const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value;
|
const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value;
|
||||||
ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size);
|
ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size);
|
||||||
|
|
||||||
@ -576,12 +583,12 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
Index m_size;
|
Index m_size;
|
||||||
const FFT& m_fft;
|
const FFT EIGEN_DEVICE_REF m_fft;
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
array<Index, NumDims> m_strides;
|
array<Index, NumDims> m_strides;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
CoeffReturnType* m_data;
|
EvaluatorPointerType m_data;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
|
|
||||||
// This will support a maximum FFT size of 2^32 for each dimension
|
// This will support a maximum FFT size of 2^32 for each dimension
|
||||||
// m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2;
|
// m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2;
|
||||||
|
@ -78,9 +78,10 @@ class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOn
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template<typename ArgType, typename Device>
|
template<typename ArgType_, typename Device>
|
||||||
struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device>
|
||||||
{
|
{
|
||||||
|
typedef const typename internal::remove_all<ArgType_>::type ArgType;
|
||||||
typedef TensorForcedEvalOp<ArgType> XprType;
|
typedef TensorForcedEvalOp<ArgType> XprType;
|
||||||
typedef typename ArgType::Scalar Scalar;
|
typedef typename ArgType::Scalar Scalar;
|
||||||
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
|
||||||
@ -88,6 +89,9 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
@ -106,8 +110,8 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
TensorBlockReader;
|
TensorBlockReader;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
|
||||||
/// op_ is used for sycl
|
: m_impl(op.expression(), device), m_op(op.expression()),
|
||||||
: m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL)
|
m_device(device), m_buffer(NULL)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
||||||
@ -115,17 +119,19 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
#if !defined(EIGEN_HIPCC)
|
#if !defined(EIGEN_HIPCC)
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
#endif
|
#endif
|
||||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
const Index numValues = internal::array_prod(m_impl.dimensions());
|
const Index numValues = internal::array_prod(m_impl.dimensions());
|
||||||
m_buffer = (CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType));
|
m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)));
|
||||||
|
#ifndef EIGEN_USE_SYCL
|
||||||
// Should initialize the memory in case we're dealing with non POD types.
|
// Should initialize the memory in case we're dealing with non POD types.
|
||||||
if (NumTraits<CoeffReturnType>::RequireInitialization) {
|
if (NumTraits<CoeffReturnType>::RequireInitialization) {
|
||||||
for (Index i = 0; i < numValues; ++i) {
|
for (Index i = 0; i < numValues; ++i) {
|
||||||
new(m_buffer+i) CoeffReturnType();
|
new(m_buffer+i) CoeffReturnType();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
|
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
|
||||||
EvalTo evalToTmp(m_buffer, m_op);
|
EvalTo evalToTmp(m_device.get(m_buffer), m_op);
|
||||||
const bool Vectorize = internal::IsVectorizable<Device, const ArgType>::value;
|
const bool Vectorize = internal::IsVectorizable<Device, const ArgType>::value;
|
||||||
internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, Vectorize>::run(evalToTmp, m_device);
|
internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, Vectorize>::run(evalToTmp, m_device);
|
||||||
return true;
|
return true;
|
||||||
@ -159,17 +165,20 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
typename Eigen::internal::traits<XprType>::PointerType data() const { return m_buffer; }
|
EvaluatorPointerType data() const { return m_buffer; }
|
||||||
|
|
||||||
/// required by sycl in order to extract the sycl accessor
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() { return m_impl; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
/// used by sycl in order to build the sycl buffer
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const{return m_device;}
|
m_buffer.bind(cgh);
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
private:
|
private:
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const ArgType m_op;
|
const ArgType m_op;
|
||||||
const Device& m_device;
|
const Device m_device;
|
||||||
CoeffReturnType* m_buffer;
|
EvaluatorPointerType m_buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,17 +20,20 @@ namespace Eigen {
|
|||||||
// map_allocator.
|
// map_allocator.
|
||||||
template<typename T> struct MakePointer {
|
template<typename T> struct MakePointer {
|
||||||
typedef T* Type;
|
typedef T* Type;
|
||||||
typedef T& RefType;
|
|
||||||
typedef T ScalarType;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// The PointerType class is a container of the device specefic pointer
|
template <typename T>
|
||||||
// used for referring to a Pointer on TensorEvaluator class. While the TensorExpression
|
EIGEN_STRONG_INLINE T* constCast(const T* data) {
|
||||||
|
return const_cast<T*>(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The StorageMemory class is a container of the device specific pointer
|
||||||
|
// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
|
||||||
// is a device-agnostic type and need MakePointer class for type conversion,
|
// is a device-agnostic type and need MakePointer class for type conversion,
|
||||||
// the TensorEvaluator calls can be specialized for a device, hence it is possible
|
// the TensorEvaluator class can be specialized for a device, hence it is possible
|
||||||
// to construct different types of temproray storage memory in TensorEvaluator
|
// to construct different types of temproray storage memory in TensorEvaluator
|
||||||
// for different devices by specializing the following PointerType class.
|
// for different devices by specializing the following StorageMemory class.
|
||||||
template<typename T, typename Device> struct PointerType : MakePointer<T>{};
|
template<typename T, typename device> struct StorageMemory: MakePointer <T> {};
|
||||||
|
|
||||||
namespace internal{
|
namespace internal{
|
||||||
template<typename A, typename B> struct Pointer_type_promotion {
|
template<typename A, typename B> struct Pointer_type_promotion {
|
||||||
@ -39,25 +42,11 @@ template<typename A, typename B> struct Pointer_type_promotion {
|
|||||||
template<typename A> struct Pointer_type_promotion<A, A> {
|
template<typename A> struct Pointer_type_promotion<A, A> {
|
||||||
static const bool val = true;
|
static const bool val = true;
|
||||||
};
|
};
|
||||||
template<typename A, typename B> struct TypeConversion;
|
|
||||||
#ifndef __SYCL_DEVICE_ONLY__
|
|
||||||
template<typename A, typename B> struct TypeConversion {
|
template<typename A, typename B> struct TypeConversion {
|
||||||
typedef A* type;
|
typedef A* type;
|
||||||
};
|
};
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(EIGEN_USE_SYCL)
|
|
||||||
namespace TensorSycl {
|
|
||||||
namespace internal{
|
|
||||||
template <typename HostExpr, typename FunctorExpr, typename Tuple_of_Acc, typename Dims, typename Op, typename Index> class ReductionFunctor;
|
|
||||||
template<typename CoeffReturnType ,typename OutAccessor, typename HostExpr, typename FunctorExpr, typename Op, typename Dims, typename Index, typename TupleType>
|
|
||||||
class FullReductionKernelFunctor;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
|
template<typename PlainObjectType, int Options_ = Unaligned, template <class> class MakePointer_ = MakePointer> class TensorMap;
|
||||||
template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
|
template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType = DenseIndex> class Tensor;
|
||||||
@ -113,6 +102,31 @@ struct ThreadPoolDevice;
|
|||||||
struct GpuDevice;
|
struct GpuDevice;
|
||||||
struct SyclDevice;
|
struct SyclDevice;
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
|
||||||
|
template <typename T> struct MakeSYCLPointer {
|
||||||
|
typedef Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T> Type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
EIGEN_STRONG_INLINE const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>&
|
||||||
|
constCast(const Eigen::TensorSycl::internal::RangeAccess<cl::sycl::access::mode::read_write, T>& data) {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct StorageMemory<T, SyclDevice> : MakeSYCLPointer<T> {};
|
||||||
|
template <typename T>
|
||||||
|
struct StorageMemory<T, const SyclDevice> : StorageMemory<T, SyclDevice> {};
|
||||||
|
|
||||||
|
namespace TensorSycl {
|
||||||
|
namespace internal{
|
||||||
|
template <typename Evaluator, typename Op> class ReductionFunctor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
enum FFTResultType {
|
enum FFTResultType {
|
||||||
RealPart = 0,
|
RealPart = 0,
|
||||||
ImagPart = 1,
|
ImagPart = 1,
|
||||||
|
@ -421,6 +421,7 @@ class GaussianGenerator {
|
|||||||
const array<T, NumDims>& std_devs)
|
const array<T, NumDims>& std_devs)
|
||||||
: m_means(means)
|
: m_means(means)
|
||||||
{
|
{
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (size_t i = 0; i < NumDims; ++i) {
|
for (size_t i = 0; i < NumDims; ++i) {
|
||||||
m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
|
m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
|
||||||
}
|
}
|
||||||
@ -428,6 +429,7 @@ class GaussianGenerator {
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const {
|
EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const {
|
||||||
T tmp = T(0);
|
T tmp = T(0);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (size_t i = 0; i < NumDims; ++i) {
|
for (size_t i = 0; i < NumDims; ++i) {
|
||||||
T offset = coordinates[i] - m_means[i];
|
T offset = coordinates[i] - m_means[i];
|
||||||
tmp += offset * offset / m_two_sigmas[i];
|
tmp += offset * offset / m_two_sigmas[i];
|
||||||
|
@ -88,6 +88,8 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
|
||||||
@ -105,21 +107,20 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_device(device), m_generator(op.generator())
|
: m_device(device), m_generator(op.generator())
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
, m_argImpl(op.expression(), device)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
TensorEvaluator<ArgType, Device> argImpl(op.expression(), device);
|
TensorEvaluator<ArgType, Device> argImpl(op.expression(), device);
|
||||||
m_dimensions = argImpl.dimensions();
|
m_dimensions = argImpl.dimensions();
|
||||||
|
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
m_strides[0] = 1;
|
m_strides[0] = 1;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < NumDims; ++i) {
|
for (int i = 1; i < NumDims; ++i) {
|
||||||
m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
|
m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
|
||||||
if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
|
if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
m_strides[NumDims - 1] = 1;
|
m_strides[NumDims - 1] = 1;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 2; i >= 0; --i) {
|
for (int i = NumDims - 2; i >= 0; --i) {
|
||||||
m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
|
m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
|
||||||
if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
|
if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]);
|
||||||
@ -129,7 +130,7 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
@ -234,11 +235,11 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
TensorOpCost::MulCost<Scalar>());
|
TensorOpCost::MulCost<Scalar>());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_argImpl; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Generator& functor() const { return m_generator; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler&) const {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -261,14 +262,11 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
array<Index, NumDims> m_strides;
|
array<Index, NumDims> m_strides;
|
||||||
array<IndexDivisor, NumDims> m_fast_strides;
|
array<IndexDivisor, NumDims> m_fast_strides;
|
||||||
Generator m_generator;
|
Generator m_generator;
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
TensorEvaluator<ArgType, Device> m_argImpl;
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -154,23 +154,6 @@ class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprT
|
|||||||
m_padding_left(padding_left), m_padding_right(padding_right),
|
m_padding_left(padding_left), m_padding_right(padding_right),
|
||||||
m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
|
m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL // this is work around for sycl as Eigen could not use c++11 deligate constructor feature
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols,
|
|
||||||
DenseIndex row_strides, DenseIndex col_strides,
|
|
||||||
DenseIndex in_row_strides, DenseIndex in_col_strides,
|
|
||||||
DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
|
|
||||||
bool padding_explicit, DenseIndex padding_top, DenseIndex padding_bottom,
|
|
||||||
DenseIndex padding_left, DenseIndex padding_right, PaddingType padding_type,
|
|
||||||
Scalar padding_value)
|
|
||||||
: m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
|
|
||||||
m_row_strides(row_strides), m_col_strides(col_strides),
|
|
||||||
m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
|
|
||||||
m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
|
|
||||||
m_padding_explicit(padding_explicit), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
|
|
||||||
m_padding_left(padding_left), m_padding_right(padding_right),
|
|
||||||
m_padding_type(padding_type), m_padding_value(padding_value) {}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
DenseIndex patch_rows() const { return m_patch_rows; }
|
DenseIndex patch_rows() const { return m_patch_rows; }
|
||||||
@ -242,6 +225,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -256,15 +241,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
typedef internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
typedef internal::TensorBlock<Scalar, Index, NumDims, Layout>
|
||||||
OutputTensorBlock;
|
OutputTensorBlock;
|
||||||
|
|
||||||
#ifdef __SYCL_DEVICE_ONLY__
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType op, const Device& device)
|
|
||||||
#else
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
|
||||||
#endif
|
|
||||||
: m_device(device), m_impl(op.expression(), device)
|
: m_device(device), m_impl(op.expression(), device)
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
, m_op(op)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
|
||||||
@ -410,7 +388,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -516,13 +494,15 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
return packetWithPossibleZero(index);
|
return packetWithPossibleZero(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
// Required by SYCL in order to construct the expression tree on the device
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& xpr() const { return m_op; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Index rowPaddingTop() const { return m_rowPaddingTop; }
|
Index rowPaddingTop() const { return m_rowPaddingTop; }
|
||||||
@ -693,6 +673,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||||
{
|
{
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = coeff(index+i);
|
values[i] = coeff(index+i);
|
||||||
}
|
}
|
||||||
@ -744,12 +725,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
|
|||||||
|
|
||||||
Scalar m_paddingValue;
|
Scalar m_paddingValue;
|
||||||
|
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
// Required for SYCL in order to construct the expression tree on the device
|
|
||||||
XprType m_op;
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -353,6 +353,7 @@ namespace internal {
|
|||||||
template<typename FirstType, typename... OtherTypes>
|
template<typename FirstType, typename... OtherTypes>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index array_prod(const IndexList<FirstType, OtherTypes...>& sizes) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index array_prod(const IndexList<FirstType, OtherTypes...>& sizes) {
|
||||||
Index result = 1;
|
Index result = 1;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (size_t i = 0; i < array_size<IndexList<FirstType, OtherTypes...> >::value; ++i) {
|
for (size_t i = 0; i < array_size<IndexList<FirstType, OtherTypes...> >::value; ++i) {
|
||||||
result *= sizes[i];
|
result *= sizes[i];
|
||||||
}
|
}
|
||||||
|
@ -86,6 +86,8 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/ false,
|
||||||
@ -131,7 +133,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -146,6 +148,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
|||||||
eigen_assert(index < dimensions().TotalSize());
|
eigen_assert(index < dimensions().TotalSize());
|
||||||
*inputIndex = 0;
|
*inputIndex = 0;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
if (idx != idx / m_fastStrides[i] * m_strides[i]) {
|
if (idx != idx / m_fastStrides[i] * m_strides[i]) {
|
||||||
@ -160,6 +163,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
|||||||
*inputIndex += index / m_strides[0];
|
*inputIndex += index / m_strides[0];
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
if (idx != idx / m_fastStrides[i] * m_strides[i]) {
|
if (idx != idx / m_fastStrides[i] * m_strides[i]) {
|
||||||
@ -195,6 +199,7 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
|||||||
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
|
||||||
|
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = coeff(index+i);
|
values[i] = coeff(index+i);
|
||||||
}
|
}
|
||||||
@ -215,11 +220,13 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
|
|||||||
compute_cost, vectorized, PacketSize);
|
compute_cost, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Strides& functor() const { return m_strides; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -37,7 +37,7 @@ namespace {
|
|||||||
{
|
{
|
||||||
#ifdef EIGEN_GPU_COMPILE_PHASE
|
#ifdef EIGEN_GPU_COMPILE_PHASE
|
||||||
return __clz(val);
|
return __clz(val);
|
||||||
#elif defined(__SYCL_DEVICE_ONLY__)
|
#elif defined(SYCL_DEVICE_ONLY)
|
||||||
return cl::sycl::clz(val);
|
return cl::sycl::clz(val);
|
||||||
#elif EIGEN_COMP_MSVC
|
#elif EIGEN_COMP_MSVC
|
||||||
unsigned long index;
|
unsigned long index;
|
||||||
@ -55,8 +55,8 @@ namespace {
|
|||||||
{
|
{
|
||||||
#ifdef EIGEN_GPU_COMPILE_PHASE
|
#ifdef EIGEN_GPU_COMPILE_PHASE
|
||||||
return __clzll(val);
|
return __clzll(val);
|
||||||
#elif defined(__SYCL_DEVICE_ONLY__)
|
#elif defined(SYCL_DEVICE_ONLY)
|
||||||
return cl::sycl::clz(val);
|
return static_cast<int>(cl::sycl::clz(val));
|
||||||
#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64
|
#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64
|
||||||
unsigned long index;
|
unsigned long index;
|
||||||
_BitScanReverse64(&index, val);
|
_BitScanReverse64(&index, val);
|
||||||
@ -92,7 +92,7 @@ namespace {
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) {
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) {
|
||||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||||
return __umulhi(a, b);
|
return __umulhi(a, b);
|
||||||
#elif defined(__SYCL_DEVICE_ONLY__)
|
#elif defined(SYCL_DEVICE_ONLY)
|
||||||
return cl::sycl::mul_hi(a, static_cast<uint32_t>(b));
|
return cl::sycl::mul_hi(a, static_cast<uint32_t>(b));
|
||||||
#else
|
#else
|
||||||
return (static_cast<uint64_t>(a) * b) >> 32;
|
return (static_cast<uint64_t>(a) * b) >> 32;
|
||||||
@ -103,7 +103,7 @@ namespace {
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
|
||||||
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
#if defined(EIGEN_GPU_COMPILE_PHASE)
|
||||||
return __umul64hi(a, b);
|
return __umul64hi(a, b);
|
||||||
#elif defined(__SYCL_DEVICE_ONLY__)
|
#elif defined(SYCL_DEVICE_ONLY)
|
||||||
return cl::sycl::mul_hi(a, static_cast<uint64_t>(b));
|
return cl::sycl::mul_hi(a, static_cast<uint64_t>(b));
|
||||||
#elif defined(__SIZEOF_INT128__)
|
#elif defined(__SIZEOF_INT128__)
|
||||||
__uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
|
__uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
|
||||||
@ -124,7 +124,7 @@ namespace {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
struct DividerHelper<64, T> {
|
struct DividerHelper<64, T> {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
|
||||||
#if defined(__SIZEOF_INT128__) && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(__SYCL_DEVICE_ONLY__)
|
#if defined(__SIZEOF_INT128__) && !defined(EIGEN_GPU_COMPILE_PHASE) && !defined(SYCL_DEVICE_ONLY)
|
||||||
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
|
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
|
||||||
#else
|
#else
|
||||||
const uint64_t shift = 1ULL << log_div;
|
const uint64_t shift = 1ULL << log_div;
|
||||||
@ -205,8 +205,8 @@ class TensorIntDivisor<int32_t, true> {
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const {
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const {
|
||||||
#ifdef EIGEN_GPU_COMPILE_PHASE
|
#ifdef EIGEN_GPU_COMPILE_PHASE
|
||||||
return (__umulhi(magic, n) >> shift);
|
return (__umulhi(magic, n) >> shift);
|
||||||
#elif defined(__SYCL_DEVICE_ONLY__)
|
#elif defined(SYCL_DEVICE_ONLY)
|
||||||
return (cl::sycl::mul_hi(static_cast<uint64_t>(magic), static_cast<uint64_t>(n)) >> shift);
|
return (cl::sycl::mul_hi(magic, static_cast<uint32_t>(n)) >> shift);
|
||||||
#else
|
#else
|
||||||
uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n);
|
uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n);
|
||||||
return (static_cast<uint32_t>(v >> 32) >> shift);
|
return (static_cast<uint32_t>(v >> 32) >> shift);
|
||||||
|
@ -134,13 +134,22 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
return m_impl.evalSubExprsIfNeeded(data);
|
return m_impl.evalSubExprsIfNeeded(data);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
@ -162,7 +171,9 @@ struct TensorEvaluator<const TensorLayoutSwapOp<ArgType>, Device>
|
|||||||
return m_impl.costPerCoeff(vectorized);
|
return m_impl.costPerCoeff(vectorized);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_impl.data(); }
|
EIGEN_DEVICE_FUNC typename Storage::Type data() const {
|
||||||
|
return constCast(m_impl.data());
|
||||||
|
}
|
||||||
|
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||||
|
|
||||||
|
@ -59,4 +59,39 @@
|
|||||||
#define EIGEN_SLEEP(n) sleep(std::max<unsigned>(1, n/1000))
|
#define EIGEN_SLEEP(n) sleep(std::max<unsigned>(1, n/1000))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Define a macro to use a reference on the host but a value on the device
|
||||||
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
|
#define EIGEN_DEVICE_REF
|
||||||
|
#else
|
||||||
|
#define EIGEN_DEVICE_REF &
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Define a macro for catching SYCL exceptions if exceptions are enabled
|
||||||
|
#if defined(EIGEN_EXCEPTIONS)
|
||||||
|
#define EIGEN_SYCL_TRY_CATCH(X) \
|
||||||
|
do { \
|
||||||
|
try { X; } \
|
||||||
|
catch(const cl::sycl::exception& e) { \
|
||||||
|
std::cerr << "SYCL exception at " \
|
||||||
|
<< __FILE__ << ":" << __LINE__ << std::endl \
|
||||||
|
<< e.what() << std::endl; \
|
||||||
|
std::rethrow_exception(std::current_exception()); \
|
||||||
|
} \
|
||||||
|
} while (false)
|
||||||
|
#else
|
||||||
|
#define EIGEN_SYCL_TRY_CATCH(X) X
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Define a macro if local memory flags are unset or one of them is set
|
||||||
|
// Setting both flags is the same as unsetting them
|
||||||
|
#if (!defined(EIGEN_SYCL_LOCAL_MEM) && !defined(EIGEN_SYCL_NO_LOCAL_MEM)) || \
|
||||||
|
(defined(EIGEN_SYCL_LOCAL_MEM) && defined(EIGEN_SYCL_NO_LOCAL_MEM))
|
||||||
|
#define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON 1
|
||||||
|
#define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_OFF 1
|
||||||
|
#elif defined(EIGEN_SYCL_LOCAL_MEM) && !defined(EIGEN_SYCL_NO_LOCAL_MEM)
|
||||||
|
#define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_ON 1
|
||||||
|
#elif !defined(EIGEN_SYCL_LOCAL_MEM) && defined(EIGEN_SYCL_NO_LOCAL_MEM)
|
||||||
|
#define EIGEN_SYCL_LOCAL_MEM_UNSET_OR_OFF 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -31,7 +31,11 @@ template<typename PlainObjectType, int Options_, template <class> class MakePoin
|
|||||||
public:
|
public:
|
||||||
typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
|
typedef TensorMap<PlainObjectType, Options_, MakePointer_> Self;
|
||||||
typedef typename PlainObjectType::Base Base;
|
typedef typename PlainObjectType::Base Base;
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
typedef typename Eigen::internal::remove_reference<typename Eigen::internal::nested<Self>::type>::type Nested;
|
||||||
|
#else
|
||||||
typedef typename Eigen::internal::nested<Self>::type Nested;
|
typedef typename Eigen::internal::nested<Self>::type Nested;
|
||||||
|
#endif
|
||||||
typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
|
typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
|
||||||
typedef typename internal::traits<PlainObjectType>::Index Index;
|
typedef typename internal::traits<PlainObjectType>::Index Index;
|
||||||
typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
|
typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
|
||||||
|
@ -85,9 +85,57 @@ struct PacketType<half, GpuDevice> {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_USE_SYCL)
|
#if defined(EIGEN_USE_SYCL)
|
||||||
template <typename T>
|
|
||||||
struct PacketType<T, SyclDevice> {
|
namespace TensorSycl {
|
||||||
typedef T type;
|
namespace internal {
|
||||||
|
|
||||||
|
template <typename Index, Index A, Index B> struct PlusOp {
|
||||||
|
static constexpr Index Value = A + B;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, Index A, Index B> struct DivOp {
|
||||||
|
static constexpr Index Value = A / B;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Index, Index start, Index end, Index step,
|
||||||
|
template <class Indx, Indx...> class StepOp>
|
||||||
|
struct static_for {
|
||||||
|
template <typename UnaryOperator>
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void loop(UnaryOperator op) {
|
||||||
|
op(start);
|
||||||
|
static_for<Index, StepOp<Index, start, step>::Value, end, step,
|
||||||
|
StepOp>::loop(op);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <typename Index, Index end, Index step,
|
||||||
|
template <class Indx, Indx...> class StepOp>
|
||||||
|
struct static_for<Index, end, end, step, StepOp> {
|
||||||
|
template <typename UnaryOperator>
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void loop(UnaryOperator) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename OutScalar, typename Device, bool Vectorizable>
|
||||||
|
struct Vectorise {
|
||||||
|
static const int PacketSize = 1;
|
||||||
|
typedef OutScalar PacketReturnType;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename OutScalar, typename Device>
|
||||||
|
struct Vectorise<OutScalar, Device, true> {
|
||||||
|
static const int PacketSize = Eigen::PacketType<OutScalar, Device>::size;
|
||||||
|
typedef typename Eigen::PacketType<OutScalar, Device>::type PacketReturnType;
|
||||||
|
};
|
||||||
|
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index roundUp(Index x, Index y) {
|
||||||
|
return ((((x) + (y)-1) / (y)) * (y));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace TensorSycl
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct PacketType<half, SyclDevice> {
|
||||||
|
typedef half type;
|
||||||
static const int size = 1;
|
static const int size = 1;
|
||||||
enum {
|
enum {
|
||||||
HasAdd = 0,
|
HasAdd = 0,
|
||||||
@ -104,8 +152,58 @@ template <typename T>
|
|||||||
HasBlend = 0
|
HasBlend = 0
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
#endif
|
template <typename Scalar>
|
||||||
|
struct PacketType<Scalar, SyclDevice> : internal::default_packet_traits {
|
||||||
|
typedef Scalar type;
|
||||||
|
typedef Scalar half;
|
||||||
|
enum {
|
||||||
|
Vectorizable = 0,
|
||||||
|
size = 1,
|
||||||
|
AlignedOnScalar = 0,
|
||||||
|
HasHalfPacket = 0
|
||||||
|
};
|
||||||
|
enum {
|
||||||
|
HasAdd = 0,
|
||||||
|
HasSub = 0,
|
||||||
|
HasMul = 0,
|
||||||
|
HasNegate = 0,
|
||||||
|
HasAbs = 0,
|
||||||
|
HasAbs2 = 0,
|
||||||
|
HasMin = 0,
|
||||||
|
HasMax = 0,
|
||||||
|
HasConj = 0,
|
||||||
|
HasSetLinear = 0
|
||||||
|
};
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
struct PacketType<Scalar, const SyclDevice> : PacketType<Scalar, SyclDevice>{};
|
||||||
|
|
||||||
|
#ifndef EIGEN_DONT_VECTORIZE_SYCL
|
||||||
|
#define PACKET_TYPE(CVQual, Type, val, lengths, DEV)\
|
||||||
|
template<> struct PacketType<CVQual Type, DEV> : internal::sycl_packet_traits<val, lengths> \
|
||||||
|
{\
|
||||||
|
typedef typename internal::packet_traits<Type>::type type;\
|
||||||
|
typedef typename internal::packet_traits<Type>::half half;\
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
PACKET_TYPE(const, float, 1, 4, SyclDevice)
|
||||||
|
PACKET_TYPE(, float, 1, 4, SyclDevice)
|
||||||
|
PACKET_TYPE(const, float, 1, 4, const SyclDevice)
|
||||||
|
PACKET_TYPE(, float, 1, 4, const SyclDevice)
|
||||||
|
|
||||||
|
PACKET_TYPE(const, double, 0, 2, SyclDevice)
|
||||||
|
PACKET_TYPE(, double, 0, 2, SyclDevice)
|
||||||
|
PACKET_TYPE(const, double, 0, 2, const SyclDevice)
|
||||||
|
PACKET_TYPE(, double, 0, 2, const SyclDevice)
|
||||||
|
#undef PACKET_TYPE
|
||||||
|
|
||||||
|
template<> struct PacketType<half, const SyclDevice>: PacketType<half, SyclDevice>{};
|
||||||
|
template<> struct PacketType<const half, const SyclDevice>: PacketType<half, SyclDevice>{};
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
// Tuple mimics std::pair but works on e.g. nvcc.
|
// Tuple mimics std::pair but works on e.g. nvcc.
|
||||||
template <typename U, typename V> struct Tuple {
|
template <typename U, typename V> struct Tuple {
|
||||||
@ -124,7 +222,7 @@ template <typename U, typename V> struct Tuple {
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Tuple& operator= (const Tuple& rhs) {
|
Tuple& operator= (const Tuple& rhs) {
|
||||||
#ifndef __SYCL_DEVICE_ONLY__
|
#ifndef SYCL_DEVICE_ONLY
|
||||||
if (&rhs == this) return *this;
|
if (&rhs == this) return *this;
|
||||||
#endif
|
#endif
|
||||||
first = rhs.first;
|
first = rhs.first;
|
||||||
|
@ -37,7 +37,7 @@ struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprTyp
|
|||||||
template<typename NewDimensions, typename XprType>
|
template<typename NewDimensions, typename XprType>
|
||||||
struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
|
struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorReshapingOp<NewDimensions, XprType>& type;
|
typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename NewDimensions, typename XprType>
|
template<typename NewDimensions, typename XprType>
|
||||||
@ -106,6 +106,9 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
|||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
|
||||||
|
|
||||||
static const int NumOutputDims = internal::array_size<Dimensions>::value;
|
static const int NumOutputDims = internal::array_size<Dimensions>::value;
|
||||||
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||||
@ -168,7 +171,7 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
return m_impl.evalSubExprsIfNeeded(data);
|
return m_impl.evalSubExprsIfNeeded(data);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
@ -326,10 +329,18 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return const_cast<Scalar*>(m_impl.data()); }
|
EIGEN_DEVICE_FUNC typename Storage::Type data() const {
|
||||||
|
return constCast(m_impl.data());
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
protected:
|
protected:
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
NewDimensions m_dimensions;
|
NewDimensions m_dimensions;
|
||||||
@ -404,7 +415,7 @@ struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<Xp
|
|||||||
template<typename StartIndices, typename Sizes, typename XprType>
|
template<typename StartIndices, typename Sizes, typename XprType>
|
||||||
struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
|
struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
|
typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename StartIndices, typename Sizes, typename XprType>
|
template<typename StartIndices, typename Sizes, typename XprType>
|
||||||
@ -488,7 +499,7 @@ template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> {
|
|||||||
// It is very expensive to start the memcpy kernel on GPU: we therefore only
|
// It is very expensive to start the memcpy kernel on GPU: we therefore only
|
||||||
// use it for large copies.
|
// use it for large copies.
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
template <typename Index> struct MemcpyTriggerForSlicing<Index, const Eigen::SyclDevice> {
|
template <typename Index> struct MemcpyTriggerForSlicing<Index, Eigen::SyclDevice> {
|
||||||
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { }
|
EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { }
|
||||||
EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
|
EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
|
||||||
};
|
};
|
||||||
@ -508,6 +519,9 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
typedef Sizes Dimensions;
|
typedef Sizes Dimensions;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
// Alignment can't be guaranteed at compile time since it depends on the
|
// Alignment can't be guaranteed at compile time since it depends on the
|
||||||
@ -575,7 +589,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
|
if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
|
||||||
&& data && m_impl.data()
|
&& data && m_impl.data()
|
||||||
@ -599,10 +613,10 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
// Use memcpy if it's going to be faster than using the regular evaluation.
|
// Use memcpy if it's going to be faster than using the regular evaluation.
|
||||||
const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
|
const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
|
||||||
if (trigger(contiguous_values)) {
|
if (trigger(contiguous_values)) {
|
||||||
Scalar* src = (Scalar*)m_impl.data();
|
EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
|
||||||
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
|
for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
|
||||||
Index offset = srcCoeff(i);
|
Index offset = srcCoeff(i);
|
||||||
m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
|
m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values * sizeof(Scalar));
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -637,6 +651,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
Index inputIndices[] = {0, 0};
|
Index inputIndices[] = {0, 0};
|
||||||
Index indices[] = {index, index + packetSize - 1};
|
Index indices[] = {index, index + packetSize - 1};
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx0 = indices[0] / m_fastOutputStrides[i];
|
const Index idx0 = indices[0] / m_fastOutputStrides[i];
|
||||||
const Index idx1 = indices[1] / m_fastOutputStrides[i];
|
const Index idx1 = indices[1] / m_fastOutputStrides[i];
|
||||||
@ -648,6 +663,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
inputIndices[0] += (indices[0] + m_offsets[0]);
|
inputIndices[0] += (indices[0] + m_offsets[0]);
|
||||||
inputIndices[1] += (indices[1] + m_offsets[0]);
|
inputIndices[1] += (indices[1] + m_offsets[0]);
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx0 = indices[0] / m_fastOutputStrides[i];
|
const Index idx0 = indices[0] / m_fastOutputStrides[i];
|
||||||
const Index idx1 = indices[1] / m_fastOutputStrides[i];
|
const Index idx1 = indices[1] / m_fastOutputStrides[i];
|
||||||
@ -667,6 +683,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||||
values[0] = m_impl.coeff(inputIndices[0]);
|
values[0] = m_impl.coeff(inputIndices[0]);
|
||||||
values[packetSize-1] = m_impl.coeff(inputIndices[1]);
|
values[packetSize-1] = m_impl.coeff(inputIndices[1]);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < packetSize-1; ++i) {
|
for (int i = 1; i < packetSize-1; ++i) {
|
||||||
values[i] = coeff(index+i);
|
values[i] = coeff(index+i);
|
||||||
}
|
}
|
||||||
@ -698,8 +715,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
m_impl.block(&input_block);
|
m_impl.block(&input_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
|
||||||
Scalar* result = const_cast<Scalar*>(m_impl.data());
|
typename Storage::Type result = constCast(m_impl.data());
|
||||||
if (result) {
|
if (result) {
|
||||||
Index offset = 0;
|
Index offset = 0;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
@ -733,19 +750,19 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
/// used by sycl
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
return m_impl;
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
}
|
m_impl.bind(cgh);
|
||||||
/// used by sycl
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& startIndices() const{
|
|
||||||
return m_offsets;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
||||||
{
|
{
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_fastOutputStrides[i];
|
const Index idx = index / m_fastOutputStrides[i];
|
||||||
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
||||||
@ -753,6 +770,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
}
|
}
|
||||||
inputIndex += (index + m_offsets[0]);
|
inputIndex += (index + m_offsets[0]);
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx = index / m_fastOutputStrides[i];
|
const Index idx = index / m_fastOutputStrides[i];
|
||||||
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
|
||||||
@ -767,7 +785,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
|
|||||||
array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
|
array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
|
||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
bool m_is_identity;
|
bool m_is_identity;
|
||||||
const StartIndices m_offsets;
|
const StartIndices m_offsets;
|
||||||
@ -829,6 +847,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
Index inputIndices[] = {0, 0};
|
Index inputIndices[] = {0, 0};
|
||||||
Index indices[] = {index, index + packetSize - 1};
|
Index indices[] = {index, index + packetSize - 1};
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
|
const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
|
||||||
const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
|
const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
|
||||||
@ -840,6 +859,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
inputIndices[0] += (indices[0] + this->m_offsets[0]);
|
inputIndices[0] += (indices[0] + this->m_offsets[0]);
|
||||||
inputIndices[1] += (indices[1] + this->m_offsets[0]);
|
inputIndices[1] += (indices[1] + this->m_offsets[0]);
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
|
const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
|
||||||
const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
|
const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
|
||||||
@ -859,6 +879,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
|
|||||||
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||||
this->m_impl.coeffRef(inputIndices[0]) = values[0];
|
this->m_impl.coeffRef(inputIndices[0]) = values[0];
|
||||||
this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
|
this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < packetSize-1; ++i) {
|
for (int i = 1; i < packetSize-1; ++i) {
|
||||||
this->coeffRef(index+i) = values[i];
|
this->coeffRef(index+i) = values[i];
|
||||||
}
|
}
|
||||||
@ -892,7 +913,7 @@ struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprTyp
|
|||||||
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
|
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
|
||||||
struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
|
struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
|
typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
|
template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
|
||||||
@ -969,6 +990,8 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
typedef Strides Dimensions;
|
typedef Strides Dimensions;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
@ -985,8 +1008,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_impl(op.expression(), device),
|
: m_impl(op.expression(), device),
|
||||||
m_device(device),
|
m_device(device),
|
||||||
m_strides(op.strides()), m_exprStartIndices(op.startIndices()),
|
m_strides(op.strides())
|
||||||
m_exprStopIndices(op.stopIndices())
|
|
||||||
{
|
{
|
||||||
// Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
|
// Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
|
||||||
DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
|
DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
|
||||||
@ -1069,7 +1091,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1091,30 +1113,28 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
|
return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
//use by sycl
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStartIndices() const { return m_exprStartIndices; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
//use by sycl
|
m_impl.bind(cgh);
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& exprStopIndices() const { return m_exprStopIndices; }
|
}
|
||||||
//use by sycl
|
#endif
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& strides() const { return m_strides; }
|
|
||||||
/// used by sycl
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
||||||
{
|
{
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i >= 0; --i) {
|
for (int i = NumDims - 1; i >= 0; --i) {
|
||||||
const Index idx = index / m_fastOutputStrides[i];
|
const Index idx = index / m_fastOutputStrides[i];
|
||||||
inputIndex += idx * m_inputStrides[i] + m_offsets[i];
|
inputIndex += idx * m_inputStrides[i] + m_offsets[i];
|
||||||
index -= idx * m_outputStrides[i];
|
index -= idx * m_outputStrides[i];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
const Index idx = index / m_fastOutputStrides[i];
|
const Index idx = index / m_fastOutputStrides[i];
|
||||||
inputIndex += idx * m_inputStrides[i] + m_offsets[i];
|
inputIndex += idx * m_inputStrides[i] + m_offsets[i];
|
||||||
@ -1125,7 +1145,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
|
||||||
#ifndef __SYCL_DEVICE_ONLY__
|
#ifndef SYCL_DEVICE_ONLY
|
||||||
return numext::maxi(min, numext::mini(max,value));
|
return numext::maxi(min, numext::mini(max,value));
|
||||||
#else
|
#else
|
||||||
return cl::sycl::clamp(value, min, max);
|
return cl::sycl::clamp(value, min, max);
|
||||||
@ -1137,15 +1157,11 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
|
|||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
bool m_is_identity;
|
bool m_is_identity;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
DSizes<Index, NumDims> m_startIndices; // clamped startIndices
|
DSizes<Index, NumDims> m_startIndices; // clamped startIndices
|
||||||
DSizes<Index, NumDims> m_dimensions;
|
DSizes<Index, NumDims> m_dimensions;
|
||||||
DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
|
DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
|
||||||
const Strides m_strides;
|
const Strides m_strides;
|
||||||
//use by sycl
|
|
||||||
const StartIndices m_exprStartIndices;
|
|
||||||
//use by sycl
|
|
||||||
const StopIndices m_exprStopIndices;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Eval as lvalue
|
// Eval as lvalue
|
||||||
|
@ -92,6 +92,8 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = true,
|
IsAligned = true,
|
||||||
@ -138,7 +140,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -151,6 +153,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
eigen_assert(index < dimensions().TotalSize());
|
eigen_assert(index < dimensions().TotalSize());
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
if (isPaddingAtIndexForDim(idx, i)) {
|
if (isPaddingAtIndexForDim(idx, i)) {
|
||||||
@ -164,6 +167,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
}
|
}
|
||||||
inputIndex += (index - m_padding[0].first);
|
inputIndex += (index - m_padding[0].first);
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx = index / m_outputStrides[i+1];
|
const Index idx = index / m_outputStrides[i+1];
|
||||||
if (isPaddingAtIndexForDim(idx, i)) {
|
if (isPaddingAtIndexForDim(idx, i)) {
|
||||||
@ -192,23 +196,25 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
|
||||||
TensorOpCost cost = m_impl.costPerCoeff(vectorized);
|
TensorOpCost cost = m_impl.costPerCoeff(vectorized);
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims; ++i)
|
for (int i = 0; i < NumDims; ++i)
|
||||||
updateCostPerDimension(cost, i, i == 0);
|
updateCostPerDimension(cost, i, i == 0);
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i >= 0; --i)
|
for (int i = NumDims - 1; i >= 0; --i)
|
||||||
updateCostPerDimension(cost, i, i == NumDims - 1);
|
updateCostPerDimension(cost, i, i == NumDims - 1);
|
||||||
}
|
}
|
||||||
return cost;
|
return cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
/// used by sycl
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PaddingDimensions& padding() const { return m_padding; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
/// used by sycl
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& padding_value() const { return m_paddingValue; }
|
m_impl.bind(cgh);
|
||||||
/// used by sycl
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const{return m_impl;}
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
|
||||||
@ -272,6 +278,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
|
|
||||||
const Index initialIndex = index;
|
const Index initialIndex = index;
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index firstIdx = index;
|
const Index firstIdx = index;
|
||||||
const Index lastIdx = index + PacketSize - 1;
|
const Index lastIdx = index + PacketSize - 1;
|
||||||
@ -329,7 +336,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
|
|
||||||
const Index initialIndex = index;
|
const Index initialIndex = index;
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index firstIdx = index;
|
const Index firstIdx = index;
|
||||||
const Index lastIdx = index + PacketSize - 1;
|
const Index lastIdx = index + PacketSize - 1;
|
||||||
@ -383,6 +390,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||||
{
|
{
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = coeff(index+i);
|
values[i] = coeff(index+i);
|
||||||
}
|
}
|
||||||
|
@ -89,6 +89,8 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
@ -103,9 +105,6 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_impl(op.expression(), device)
|
: m_impl(op.expression(), device)
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
, m_patch_dims(op.patch_dims())
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
Index num_patches = 1;
|
Index num_patches = 1;
|
||||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||||
@ -149,7 +148,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -167,6 +166,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index];
|
Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index];
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 2; i > 0; --i) {
|
for (int i = NumDims - 2; i > 0; --i) {
|
||||||
const Index patchIdx = patchIndex / m_patchStrides[i];
|
const Index patchIdx = patchIndex / m_patchStrides[i];
|
||||||
patchIndex -= patchIdx * m_patchStrides[i];
|
patchIndex -= patchIdx * m_patchStrides[i];
|
||||||
@ -175,6 +175,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i];
|
inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 2; ++i) {
|
for (int i = 0; i < NumDims - 2; ++i) {
|
||||||
const Index patchIdx = patchIndex / m_patchStrides[i];
|
const Index patchIdx = patchIndex / m_patchStrides[i];
|
||||||
patchIndex -= patchIdx * m_patchStrides[i];
|
patchIndex -= patchIdx * m_patchStrides[i];
|
||||||
@ -202,6 +203,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
|
|
||||||
Index inputIndices[2] = {0, 0};
|
Index inputIndices[2] = {0, 0};
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 2; i > 0; --i) {
|
for (int i = NumDims - 2; i > 0; --i) {
|
||||||
const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
|
const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
|
||||||
patchIndices[1] / m_patchStrides[i]};
|
patchIndices[1] / m_patchStrides[i]};
|
||||||
@ -217,6 +219,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i];
|
inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 2; ++i) {
|
for (int i = 0; i < NumDims - 2; ++i) {
|
||||||
const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
|
const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i],
|
||||||
patchIndices[1] / m_patchStrides[i]};
|
patchIndices[1] / m_patchStrides[i]};
|
||||||
@ -243,6 +246,7 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
|
EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
|
||||||
values[0] = m_impl.coeff(inputIndices[0]);
|
values[0] = m_impl.coeff(inputIndices[0]);
|
||||||
values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
|
values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < PacketSize-1; ++i) {
|
for (int i = 1; i < PacketSize-1; ++i) {
|
||||||
values[i] = coeff(index+i);
|
values[i] = coeff(index+i);
|
||||||
}
|
}
|
||||||
@ -259,11 +263,13 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PatchDim& functor() const { return m_patch_dims; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -274,9 +280,6 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device>
|
|||||||
|
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
const PatchDim m_patch_dims;
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
// for linear algebra.
|
// for linear algebra.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
// Copyright (C) 2018 Mehdi Goli <eigen@codeplay.com> Codeplay Software Ltd.
|
||||||
//
|
//
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
@ -44,6 +45,7 @@ EIGEN_DEVICE_FUNC uint64_t get_random_seed() {
|
|||||||
uint64_t rnd = ::random() ^ mach_absolute_time();
|
uint64_t rnd = ::random() ^ mach_absolute_time();
|
||||||
return rnd;
|
return rnd;
|
||||||
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
// Augment the current time with pseudo random number generation
|
// Augment the current time with pseudo random number generation
|
||||||
// to ensure that we get different seeds if we try to generate seeds
|
// to ensure that we get different seeds if we try to generate seeds
|
||||||
@ -147,14 +149,41 @@ template <typename T> class UniformRandomGenerator {
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
|
||||||
uint64_t seed = 0) {
|
uint64_t seed = 0) {
|
||||||
m_state = PCG_XSH_RS_state(seed);
|
m_state = PCG_XSH_RS_state(seed);
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
|
||||||
|
// Therefor, we need two step to initializate the m_state.
|
||||||
|
// IN SYCL, the constructor of the functor is s called on the CPU
|
||||||
|
// and we get the clock seed here from the CPU. However, This seed is
|
||||||
|
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
|
||||||
|
// and only available on the Operator() function (which is called on the GPU).
|
||||||
|
// Thus for CUDA (((CLOCK + global_thread_id)* 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread
|
||||||
|
// but for SYCL ((CLOCK * 6364136223846793005ULL) + 0xda3e39cb94b95bdbULL) is passed to each thread and each thread adds
|
||||||
|
// the (global_thread_id* 6364136223846793005ULL) for itself only once, in order to complete the construction
|
||||||
|
// similar to CUDA Therefore, the thread Id injection is not available at this stage.
|
||||||
|
//However when the operator() is called the thread ID will be avilable. So inside the opeator,
|
||||||
|
// we add the thrreadID, BlockId,... (which is equivalent of i)
|
||||||
|
//to the seed and construct the unique m_state per thead similar to cuda.
|
||||||
|
m_exec_once =false;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator(
|
||||||
const UniformRandomGenerator& other) {
|
const UniformRandomGenerator& other) {
|
||||||
m_state = other.m_state;
|
m_state = other.m_state;
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
m_exec_once =other.m_exec_once;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
T operator()(Index i) const {
|
T operator()(Index i) const {
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
if(!m_exec_once) {
|
||||||
|
// This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
|
||||||
|
// The (i * 6364136223846793005ULL) is the remaining part of the PCG_XSH_RS_state on the GPU side
|
||||||
|
m_state += (i * 6364136223846793005ULL);
|
||||||
|
m_exec_once =true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
T result = RandomToTypeUniform<T>(&m_state, i);
|
T result = RandomToTypeUniform<T>(&m_state, i);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -163,6 +192,14 @@ template <typename T> class UniformRandomGenerator {
|
|||||||
Packet packetOp(Index i) const {
|
Packet packetOp(Index i) const {
|
||||||
const int packetSize = internal::unpacket_traits<Packet>::size;
|
const int packetSize = internal::unpacket_traits<Packet>::size;
|
||||||
EIGEN_ALIGN_MAX T values[packetSize];
|
EIGEN_ALIGN_MAX T values[packetSize];
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
if(!m_exec_once) {
|
||||||
|
// This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
|
||||||
|
m_state += (i * 6364136223846793005ULL);
|
||||||
|
m_exec_once =true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int j = 0; j < packetSize; ++j) {
|
for (int j = 0; j < packetSize; ++j) {
|
||||||
values[j] = RandomToTypeUniform<T>(&m_state, i);
|
values[j] = RandomToTypeUniform<T>(&m_state, i);
|
||||||
}
|
}
|
||||||
@ -171,6 +208,9 @@ template <typename T> class UniformRandomGenerator {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
mutable uint64_t m_state;
|
mutable uint64_t m_state;
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
mutable bool m_exec_once;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
@ -222,14 +262,37 @@ template <typename T> class NormalRandomGenerator {
|
|||||||
// Uses the given "seed" if non-zero, otherwise uses a random seed.
|
// Uses the given "seed" if non-zero, otherwise uses a random seed.
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) {
|
||||||
m_state = PCG_XSH_RS_state(seed);
|
m_state = PCG_XSH_RS_state(seed);
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
|
||||||
|
// Therefor, we need two steps to initializate the m_state.
|
||||||
|
// IN SYCL, the constructor of the functor is s called on the CPU
|
||||||
|
// and we get the clock seed here from the CPU. However, This seed is
|
||||||
|
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
|
||||||
|
// and only available on the Operator() function (which is called on the GPU).
|
||||||
|
// Therefore, the thread Id injection is not available at this stage. However when the operator()
|
||||||
|
//is called the thread ID will be avilable. So inside the opeator,
|
||||||
|
// we add the thrreadID, BlockId,... (which is equivalent of i)
|
||||||
|
//to the seed and construct the unique m_state per thead similar to cuda.
|
||||||
|
m_exec_once =false;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(
|
||||||
const NormalRandomGenerator& other) {
|
const NormalRandomGenerator& other) {
|
||||||
m_state = other.m_state;
|
m_state = other.m_state;
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
m_exec_once=other.m_exec_once;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template<typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
T operator()(Index i) const {
|
T operator()(Index i) const {
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
if(!m_exec_once) {
|
||||||
|
// This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
|
||||||
|
m_state += (i * 6364136223846793005ULL);
|
||||||
|
m_exec_once =true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
T result = RandomToTypeNormal<T>(&m_state, i);
|
T result = RandomToTypeNormal<T>(&m_state, i);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -238,6 +301,14 @@ template <typename T> class NormalRandomGenerator {
|
|||||||
Packet packetOp(Index i) const {
|
Packet packetOp(Index i) const {
|
||||||
const int packetSize = internal::unpacket_traits<Packet>::size;
|
const int packetSize = internal::unpacket_traits<Packet>::size;
|
||||||
EIGEN_ALIGN_MAX T values[packetSize];
|
EIGEN_ALIGN_MAX T values[packetSize];
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
if(!m_exec_once) {
|
||||||
|
// This is the second stage of adding thread Id to the CPU clock seed and build unique seed per thread
|
||||||
|
m_state += (i * 6364136223846793005ULL);
|
||||||
|
m_exec_once =true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int j = 0; j < packetSize; ++j) {
|
for (int j = 0; j < packetSize; ++j) {
|
||||||
values[j] = RandomToTypeNormal<T>(&m_state, i);
|
values[j] = RandomToTypeNormal<T>(&m_state, i);
|
||||||
}
|
}
|
||||||
@ -246,6 +317,9 @@ template <typename T> class NormalRandomGenerator {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
mutable uint64_t m_state;
|
mutable uint64_t m_state;
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
mutable bool m_exec_once;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -299,7 +299,7 @@ template <typename Self, typename Op, typename Device, bool Vectorizable = (Self
|
|||||||
struct FullReducer {
|
struct FullReducer {
|
||||||
static const bool HasOptimizedImplementation = false;
|
static const bool HasOptimizedImplementation = false;
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) {
|
static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::EvaluatorPointerType output) {
|
||||||
const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
|
const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions());
|
||||||
*output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer);
|
*output = InnerMostDimReducer<Self, Op, Vectorizable>::reduce(self, 0, num_coeffs, reducer);
|
||||||
}
|
}
|
||||||
@ -400,6 +400,18 @@ struct OuterReducer {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// Default Generic reducer
|
||||||
|
template <typename Self, typename Op, typename Device>
|
||||||
|
struct GenericReducer {
|
||||||
|
static const bool HasOptimizedImplementation = false;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
|
||||||
|
eigen_assert(false && "Not implemented");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
|
#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
|
||||||
template <int B, int N, typename S, typename R, typename I_>
|
template <int B, int N, typename S, typename R, typename I_>
|
||||||
@ -423,6 +435,23 @@ template <int NPT, typename S, typename R, typename I_>
|
|||||||
__global__ void OuterReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*);
|
__global__ void OuterReductionKernel(R, const S, I_, I_, typename S::CoeffReturnType*);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For SYCL, the return type of the reduction is deduced from the initialize method of the given Op.
|
||||||
|
* This allows the reduction to have a different type for the accumulator than the input data type.
|
||||||
|
* If this is the case, the functor needs to have two reduce method: one for reducing an element of the input
|
||||||
|
* with the accumulator and the other for reducing two accumulators.
|
||||||
|
* Such a reducer can be useful for instance when the accumulator is a boolean or a bitset that checks for
|
||||||
|
* some properties of the input.
|
||||||
|
*/
|
||||||
|
template <typename Op, typename CoeffReturnType>
|
||||||
|
struct ReductionReturnType {
|
||||||
|
#if EIGEN_HAS_CXX11 && defined(EIGEN_USE_SYCL)
|
||||||
|
typedef typename remove_const<decltype(std::declval<Op>().initialize())>::type type;
|
||||||
|
#else
|
||||||
|
typedef typename remove_const<CoeffReturnType>::type type;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
template <typename Self, typename Op,
|
template <typename Self, typename Op,
|
||||||
bool Vectorizable =
|
bool Vectorizable =
|
||||||
(Self::InputPacketAccess & Self::ReducerTraits::PacketAccess)>
|
(Self::InputPacketAccess & Self::ReducerTraits::PacketAccess)>
|
||||||
@ -520,12 +549,15 @@ class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType,
|
|||||||
const Op m_reducer;
|
const Op m_reducer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename ArgType, typename Device>
|
||||||
|
struct TensorReductionEvaluatorBase;
|
||||||
|
|
||||||
// Eval as rvalue
|
// Eval as rvalue
|
||||||
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
|
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
|
||||||
struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
|
struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
|
||||||
{
|
{
|
||||||
typedef internal::reducer_traits<Op, Device> ReducerTraits;
|
typedef internal::reducer_traits<Op, Device> ReducerTraits;
|
||||||
|
typedef Dims ReducedDims;
|
||||||
typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType;
|
typedef TensorReductionOp<Op, Dims, ArgType, MakePointer_> XprType;
|
||||||
typedef typename XprType::Index Index;
|
typedef typename XprType::Index Index;
|
||||||
typedef ArgType ChildType;
|
typedef ArgType ChildType;
|
||||||
@ -535,12 +567,20 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
static const int NumOutputDims = NumInputDims - NumReducedDims;
|
static const int NumOutputDims = NumInputDims - NumReducedDims;
|
||||||
typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions;
|
typedef typename internal::conditional<NumOutputDims==0, Sizes<>, DSizes<Index, NumOutputDims> >::type Dimensions;
|
||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self;
|
typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Self;
|
||||||
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
|
static const bool InputPacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess;
|
||||||
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
|
typedef typename internal::ReductionReturnType<Op, typename XprType::CoeffReturnType>::type CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const Index PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const Index PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
|
||||||
|
typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
|
// Subset of strides of the input tensor for the non-reduced dimensions.
|
||||||
|
// Indexed by output dimensions.
|
||||||
|
static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess,
|
PacketAccess = Self::InputPacketAccess && ReducerTraits::PacketAccess,
|
||||||
@ -562,11 +602,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims<Dims, NumInputDims, Layout>::value;
|
||||||
static const bool RunningFullReduction = (NumOutputDims==0);
|
static const bool RunningFullReduction = (NumOutputDims==0);
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReductionEvaluatorBase(const XprType& op, const Device& device)
|
||||||
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
|
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
|
||||||
#if defined(EIGEN_USE_SYCL)
|
|
||||||
, m_xpr_dims(op.dims())
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
|
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
|
||||||
@ -653,7 +690,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
// of which will eventually result in an NVCC error
|
// of which will eventually result in an NVCC error
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
#endif
|
#endif
|
||||||
bool evalSubExprsIfNeeded(typename MakePointer_<CoeffReturnType>::Type data) {
|
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
|
|
||||||
// Use the FullReducer if possible.
|
// Use the FullReducer if possible.
|
||||||
@ -663,7 +700,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
!RunningOnGPU))) {
|
!RunningOnGPU))) {
|
||||||
bool need_assign = false;
|
bool need_assign = false;
|
||||||
if (!data) {
|
if (!data) {
|
||||||
m_result = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType)));
|
m_result = static_cast<EvaluatorPointerType>(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType))));
|
||||||
data = m_result;
|
data = m_result;
|
||||||
need_assign = true;
|
need_assign = true;
|
||||||
}
|
}
|
||||||
@ -671,20 +708,9 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data);
|
internal::FullReducer<Self, Op, Device>::run(*this, reducer, m_device, data);
|
||||||
return need_assign;
|
return need_assign;
|
||||||
}
|
}
|
||||||
else if(RunningOnSycl){
|
|
||||||
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
|
||||||
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
|
||||||
if (!data) {
|
|
||||||
data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
|
||||||
m_result = data;
|
|
||||||
}
|
|
||||||
Op reducer(m_reducer);
|
|
||||||
internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve);
|
|
||||||
return (m_result != NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attempt to use an optimized reduction.
|
// Attempt to use an optimized reduction.
|
||||||
else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) {
|
else if ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || (RunningOnSycl)) {
|
||||||
bool reducing_inner_dims = true;
|
bool reducing_inner_dims = true;
|
||||||
for (int i = 0; i < NumReducedDims; ++i) {
|
for (int i = 0; i < NumReducedDims; ++i) {
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
@ -698,8 +724,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
||||||
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
||||||
if (!data) {
|
if (!data) {
|
||||||
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) {
|
if ((num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) || (RunningOnSycl)) {
|
||||||
data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
data = static_cast<EvaluatorPointerType>(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)));
|
||||||
m_result = data;
|
m_result = data;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -707,6 +733,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
|
// For SYCL this if always return false
|
||||||
if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
|
if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
|
||||||
if (m_result) {
|
if (m_result) {
|
||||||
m_device.deallocate_temp(m_result);
|
m_device.deallocate_temp(m_result);
|
||||||
@ -731,8 +758,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
||||||
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
||||||
if (!data) {
|
if (!data) {
|
||||||
if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) {
|
if ((num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) || (RunningOnSycl)) {
|
||||||
data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
|
data = static_cast<EvaluatorPointerType>(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)));
|
||||||
m_result = data;
|
m_result = data;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -740,6 +767,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
|
// For SYCL this if always return false
|
||||||
if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
|
if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
|
||||||
if (m_result) {
|
if (m_result) {
|
||||||
m_device.deallocate_temp(m_result);
|
m_device.deallocate_temp(m_result);
|
||||||
@ -750,6 +778,21 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
return (m_result != NULL);
|
return (m_result != NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#if defined(EIGEN_USE_SYCL)
|
||||||
|
// If there is no Optimised version for SYCL, the reduction expression
|
||||||
|
// must break into two subexpression and use the SYCL generic Reducer on the device.
|
||||||
|
if(RunningOnSycl) {
|
||||||
|
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
|
||||||
|
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
|
||||||
|
if (!data) {
|
||||||
|
data = static_cast<EvaluatorPointerType>(m_device.get((CoeffReturnType*)m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)));
|
||||||
|
m_result = data;
|
||||||
|
}
|
||||||
|
Op reducer(m_reducer);
|
||||||
|
internal::GenericReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve);
|
||||||
|
return (m_result != NULL);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -764,7 +807,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
if ((RunningOnSycl || RunningFullReduction || RunningOnGPU) && m_result) {
|
if (( RunningFullReduction || RunningOnGPU) && m_result ) {
|
||||||
return *(m_result + index);
|
return *(m_result + index);
|
||||||
}
|
}
|
||||||
Op reducer(m_reducer);
|
Op reducer(m_reducer);
|
||||||
@ -1097,12 +1140,15 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
m_device.deallocate(reducers);
|
m_device.deallocate(reducers);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename MakePointer_<CoeffReturnType>::Type data() const { return m_result; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_result; }
|
||||||
|
EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||||
#if defined(EIGEN_USE_SYCL)
|
EIGEN_DEVICE_FUNC const Device& device() const { return m_device; }
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
#ifdef EIGEN_USE_SYCL
|
||||||
const Device& device() const { return m_device; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
const Dims& xprDims() const { return m_xpr_dims; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
m_result.bind(cgh);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -1126,8 +1172,9 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(EIGEN_USE_SYCL)
|
#if defined(EIGEN_USE_SYCL)
|
||||||
template < typename HostExpr_, typename FunctorExpr_, typename Tuple_of_Acc_, typename Dims_, typename Op_, typename Index_> friend class TensorSycl::internal::ReductionFunctor;
|
template < typename Evaluator_, typename Op__> friend class TensorSycl::internal::ReductionFunctor;
|
||||||
template<typename CoeffReturnType_ ,typename OutAccessor_, typename HostExpr_, typename FunctorExpr_, typename Op_, typename Dims_, typename Index_, typename TupleType_> friend class TensorSycl::internal::FullReductionKernelFunctor;
|
// SYCL need the Generic reducer for the case the recution algorithm is neither inner, outer, and full reducer
|
||||||
|
template <typename, typename, typename> friend struct internal::GenericReducer;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -1255,9 +1302,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
|
|||||||
// Precomputed strides for the output tensor.
|
// Precomputed strides for the output tensor.
|
||||||
array<Index, NumOutputDims> m_outputStrides;
|
array<Index, NumOutputDims> m_outputStrides;
|
||||||
array<internal::TensorIntDivisor<Index>, NumOutputDims> m_fastOutputStrides;
|
array<internal::TensorIntDivisor<Index>, NumOutputDims> m_fastOutputStrides;
|
||||||
// Subset of strides of the input tensor for the non-reduced dimensions.
|
|
||||||
// Indexed by output dimensions.
|
|
||||||
static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
|
|
||||||
array<Index, NumPreservedStrides> m_preservedStrides;
|
array<Index, NumPreservedStrides> m_preservedStrides;
|
||||||
// Map from output to input dimension index.
|
// Map from output to input dimension index.
|
||||||
array<Index, NumOutputDims> m_output_to_input_dim_map;
|
array<Index, NumOutputDims> m_output_to_input_dim_map;
|
||||||
@ -1288,13 +1332,36 @@ static const bool RunningOnGPU = false;
|
|||||||
static const bool RunningOnGPU = false;
|
static const bool RunningOnGPU = false;
|
||||||
static const bool RunningOnSycl = false;
|
static const bool RunningOnSycl = false;
|
||||||
#endif
|
#endif
|
||||||
typename MakePointer_<CoeffReturnType>::Type m_result;
|
EvaluatorPointerType m_result;
|
||||||
|
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
|
};
|
||||||
|
|
||||||
#if defined(EIGEN_USE_SYCL)
|
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_, typename Device>
|
||||||
const Dims m_xpr_dims;
|
struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device>
|
||||||
#endif
|
: public TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> {
|
||||||
|
typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Device> Base;
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Device& device) : Base(op, device){}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<typename Op, typename Dims, typename ArgType, template <class> class MakePointer_>
|
||||||
|
struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Eigen::SyclDevice>
|
||||||
|
: public TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Eigen::SyclDevice> {
|
||||||
|
|
||||||
|
typedef TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Eigen::SyclDevice> Base;
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const typename Base::XprType& op, const Eigen::SyclDevice& device) : Base(op, device){}
|
||||||
|
// The coeff function in the base the recursive method which is not an standard layout and cannot be used in the SYCL kernel
|
||||||
|
//Therefore the coeff function should be overridden by for SYCL kernel
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Base::CoeffReturnType coeff(typename Base::Index index) const {
|
||||||
|
return *(this->data() + index);
|
||||||
|
}
|
||||||
|
// The packet function in the base the recursive method which is not an standard layout and cannot be used in the SYCL kernel
|
||||||
|
//Therefore the packet function should be overridden by for SYCL kernel
|
||||||
|
template<int LoadMode>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Base::PacketReturnType packet(typename Base::Index index) const {
|
||||||
|
return internal::pload<typename Base::PacketReturnType>(this->data() + index);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -44,6 +44,9 @@ class TensorLazyEvaluatorReadOnly : public TensorLazyBaseEvaluator<Dimensions, t
|
|||||||
public:
|
public:
|
||||||
// typedef typename TensorEvaluator<Expr, Device>::Dimensions Dimensions;
|
// typedef typename TensorEvaluator<Expr, Device>::Dimensions Dimensions;
|
||||||
typedef typename TensorEvaluator<Expr, Device>::Scalar Scalar;
|
typedef typename TensorEvaluator<Expr, Device>::Scalar Scalar;
|
||||||
|
typedef StorageMemory<Scalar, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
typedef TensorEvaluator<Expr, Device> EvalType;
|
||||||
|
|
||||||
TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) {
|
TensorLazyEvaluatorReadOnly(const Expr& expr, const Device& device) : m_impl(expr, device), m_dummy(Scalar(0)) {
|
||||||
m_dims = m_impl.dimensions();
|
m_dims = m_impl.dimensions();
|
||||||
@ -79,6 +82,8 @@ class TensorLazyEvaluatorWritable : public TensorLazyEvaluatorReadOnly<Dimension
|
|||||||
public:
|
public:
|
||||||
typedef TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> Base;
|
typedef TensorLazyEvaluatorReadOnly<Dimensions, Expr, Device> Base;
|
||||||
typedef typename Base::Scalar Scalar;
|
typedef typename Base::Scalar Scalar;
|
||||||
|
typedef StorageMemory<Scalar, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) {
|
TensorLazyEvaluatorWritable(const Expr& expr, const Device& device) : Base(expr, device) {
|
||||||
}
|
}
|
||||||
@ -362,6 +367,8 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
|
|||||||
typedef typename Derived::Scalar CoeffReturnType;
|
typedef typename Derived::Scalar CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
typedef typename Derived::Dimensions Dimensions;
|
typedef typename Derived::Dimensions Dimensions;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -379,7 +386,7 @@ struct TensorEvaluator<const TensorRef<Derived>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_ref.dimensions(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,6 +109,8 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -145,7 +147,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const Dimensions& dimensions() const { return m_dimensions; }
|
const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -158,6 +160,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
eigen_assert(index < dimensions().TotalSize());
|
eigen_assert(index < dimensions().TotalSize());
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
Index idx = index / m_strides[i];
|
Index idx = index / m_strides[i];
|
||||||
index -= idx * m_strides[i];
|
index -= idx * m_strides[i];
|
||||||
@ -172,6 +175,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
inputIndex += index;
|
inputIndex += index;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
Index idx = index / m_strides[i];
|
Index idx = index / m_strides[i];
|
||||||
index -= idx * m_strides[i];
|
index -= idx * m_strides[i];
|
||||||
@ -205,6 +209,7 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
// local structure.
|
// local structure.
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
|
||||||
values[PacketSize];
|
values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = coeff(index+i);
|
values[i] = coeff(index+i);
|
||||||
}
|
}
|
||||||
@ -225,12 +230,14 @@ struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device
|
|||||||
TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
|
TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
#ifdef EIGEN_USE_SYCL
|
||||||
const TensorEvaluator<ArgType, Device> & impl() const { return m_impl; }
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
/// added for sycl in order to construct the buffer from sycl device
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
ReverseDimensions functor() const { return m_reverse; }
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Dimensions m_dimensions;
|
Dimensions m_dimensions;
|
||||||
@ -285,11 +292,11 @@ struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
|
|||||||
// This code is pilfered from TensorMorphing.h
|
// This code is pilfered from TensorMorphing.h
|
||||||
EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
|
EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
|
||||||
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
this->coeffRef(index+i) = values[i];
|
this->coeffRef(index+i) = values[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -86,12 +86,15 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
|
|
||||||
typedef TensorScanOp<Op, ArgType> XprType;
|
typedef TensorScanOp<Op, ArgType> XprType;
|
||||||
typedef typename XprType::Index Index;
|
typedef typename XprType::Index Index;
|
||||||
|
typedef const ArgType ChildType;
|
||||||
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
|
||||||
typedef DSizes<Index, NumDims> Dimensions;
|
typedef DSizes<Index, NumDims> Dimensions;
|
||||||
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self;
|
typedef TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> Self;
|
||||||
|
typedef StorageMemory<Scalar, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -110,7 +113,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
m_exclusive(op.exclusive()),
|
m_exclusive(op.exclusive()),
|
||||||
m_accumulator(op.accumulator()),
|
m_accumulator(op.accumulator()),
|
||||||
m_size(m_impl.dimensions()[op.axis()]),
|
m_size(m_impl.dimensions()[op.axis()]),
|
||||||
m_stride(1),
|
m_stride(1), m_consume_dim(op.axis()),
|
||||||
m_output(NULL) {
|
m_output(NULL) {
|
||||||
|
|
||||||
// Accumulating a scalar isn't supported.
|
// Accumulating a scalar isn't supported.
|
||||||
@ -142,6 +145,10 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
return m_stride;
|
return m_stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& consume_dim() const {
|
||||||
|
return m_consume_dim;
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index& size() const {
|
||||||
return m_size;
|
return m_size;
|
||||||
}
|
}
|
||||||
@ -162,7 +169,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
return m_device;
|
return m_device;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
|
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
ScanLauncher<Self, Op, Device> launcher;
|
ScanLauncher<Self, Op, Device> launcher;
|
||||||
if (data) {
|
if (data) {
|
||||||
@ -171,7 +178,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Index total_size = internal::array_prod(dimensions());
|
const Index total_size = internal::array_prod(dimensions());
|
||||||
m_output = static_cast<CoeffReturnType*>(m_device.allocate(total_size * sizeof(Scalar)));
|
m_output = static_cast<EvaluatorPointerType>(m_device.get((Scalar*) m_device.allocate_temp(total_size * sizeof(Scalar))));
|
||||||
launcher(*this, m_output);
|
launcher(*this, m_output);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -181,7 +188,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
|
return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Eigen::internal::traits<XprType>::PointerType data() const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
|
||||||
{
|
{
|
||||||
return m_output;
|
return m_output;
|
||||||
}
|
}
|
||||||
@ -196,21 +203,29 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
if (m_output != NULL) {
|
if (m_output) {
|
||||||
m_device.deallocate(m_output);
|
m_device.deallocate_temp(m_output);
|
||||||
m_output = NULL;
|
m_output = NULL;
|
||||||
}
|
}
|
||||||
m_impl.cleanup();
|
m_impl.cleanup();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
m_output.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
protected:
|
protected:
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
const bool m_exclusive;
|
const bool m_exclusive;
|
||||||
Op m_accumulator;
|
Op m_accumulator;
|
||||||
const Index m_size;
|
const Index m_size;
|
||||||
Index m_stride;
|
Index m_stride;
|
||||||
CoeffReturnType* m_output;
|
Index m_consume_dim;
|
||||||
|
EvaluatorPointerType m_output;
|
||||||
};
|
};
|
||||||
|
|
||||||
// CPU implementation of scan
|
// CPU implementation of scan
|
||||||
|
@ -109,6 +109,8 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -130,8 +132,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
|
||||||
const Device& device)
|
const Device& device)
|
||||||
: m_device(device),
|
: m_device(device),
|
||||||
m_impl(op.expression(), device),
|
m_impl(op.expression(), device)
|
||||||
m_shuffle(op.shufflePermutation())
|
|
||||||
{
|
{
|
||||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||||
const Shuffle& shuffle = op.shufflePermutation();
|
const Shuffle& shuffle = op.shufflePermutation();
|
||||||
@ -172,7 +173,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -194,6 +195,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
static PacketReturnType Run(const Self& self, Index index) {
|
static PacketReturnType Run(const Self& self, Index index) {
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = self.coeff(index + i);
|
values[i] = self.coeff(index + i);
|
||||||
}
|
}
|
||||||
@ -210,6 +212,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
return self.m_impl.template packet<LoadMode>(index);
|
return self.m_impl.template packet<LoadMode>(index);
|
||||||
} else {
|
} else {
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = self.coeff(index + i);
|
values[i] = self.coeff(index + i);
|
||||||
}
|
}
|
||||||
@ -330,13 +333,14 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
TensorOpCost(0, 0, compute_cost, m_is_identity /* vectorized */, PacketSize);
|
TensorOpCost(0, 0, compute_cost, m_is_identity /* vectorized */, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
|
||||||
|
|
||||||
// required by sycl
|
|
||||||
EIGEN_STRONG_INLINE const Shuffle& shufflePermutation() const {return m_shuffle;}
|
|
||||||
// required by sycl
|
|
||||||
EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const {return m_impl;}
|
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(
|
||||||
Index input_index,
|
Index input_index,
|
||||||
@ -389,10 +393,8 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
array<Index, NumDims> m_unshuffledInputStrides;
|
array<Index, NumDims> m_unshuffledInputStrides;
|
||||||
|
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
/// required by sycl
|
|
||||||
Shuffle m_shuffle;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -444,6 +446,7 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
this->coeffRef(index+i) = values[i];
|
this->coeffRef(index+i) = values[i];
|
||||||
}
|
}
|
||||||
|
@ -37,7 +37,7 @@ struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
|
|||||||
template<typename Strides, typename XprType>
|
template<typename Strides, typename XprType>
|
||||||
struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
|
struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorStridingOp<Strides, XprType>& type;
|
typedef const TensorStridingOp<Strides, XprType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Strides, typename XprType>
|
template<typename Strides, typename XprType>
|
||||||
@ -108,6 +108,8 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
||||||
@ -120,7 +122,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||||
: m_impl(op.expression(), device), m_strides(op.strides())
|
: m_impl(op.expression(), device)
|
||||||
{
|
{
|
||||||
m_dimensions = m_impl.dimensions();
|
m_dimensions = m_impl.dimensions();
|
||||||
for (int i = 0; i < NumDims; ++i) {
|
for (int i = 0; i < NumDims; ++i) {
|
||||||
@ -149,9 +151,10 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType/*data*/) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -173,6 +176,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
Index inputIndices[] = {0, 0};
|
Index inputIndices[] = {0, 0};
|
||||||
Index indices[] = {index, index + PacketSize - 1};
|
Index indices[] = {index, index + PacketSize - 1};
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx0 = indices[0] / m_outputStrides[i];
|
const Index idx0 = indices[0] / m_outputStrides[i];
|
||||||
const Index idx1 = indices[1] / m_outputStrides[i];
|
const Index idx1 = indices[1] / m_outputStrides[i];
|
||||||
@ -184,6 +188,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
inputIndices[0] += indices[0] * m_inputStrides[0];
|
inputIndices[0] += indices[0] * m_inputStrides[0];
|
||||||
inputIndices[1] += indices[1] * m_inputStrides[0];
|
inputIndices[1] += indices[1] * m_inputStrides[0];
|
||||||
} else { // RowMajor
|
} else { // RowMajor
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx0 = indices[0] / m_outputStrides[i];
|
const Index idx0 = indices[0] / m_outputStrides[i];
|
||||||
const Index idx1 = indices[1] / m_outputStrides[i];
|
const Index idx1 = indices[1] / m_outputStrides[i];
|
||||||
@ -203,6 +208,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
values[0] = m_impl.coeff(inputIndices[0]);
|
values[0] = m_impl.coeff(inputIndices[0]);
|
||||||
values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
|
values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < PacketSize-1; ++i) {
|
for (int i = 1; i < PacketSize-1; ++i) {
|
||||||
values[i] = coeff(index+i);
|
values[i] = coeff(index+i);
|
||||||
}
|
}
|
||||||
@ -225,18 +231,20 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
Strides functor() const { return m_strides; }
|
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
|
||||||
{
|
{
|
||||||
Index inputIndex = 0;
|
Index inputIndex = 0;
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
inputIndex += idx * m_inputStrides[i];
|
inputIndex += idx * m_inputStrides[i];
|
||||||
@ -244,6 +252,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
}
|
}
|
||||||
inputIndex += index * m_inputStrides[0];
|
inputIndex += index * m_inputStrides[0];
|
||||||
} else { // RowMajor
|
} else { // RowMajor
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx = index / m_outputStrides[i];
|
const Index idx = index / m_outputStrides[i];
|
||||||
inputIndex += idx * m_inputStrides[i];
|
inputIndex += idx * m_inputStrides[i];
|
||||||
@ -258,7 +267,6 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
array<Index, NumDims> m_outputStrides;
|
array<Index, NumDims> m_outputStrides;
|
||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
const Strides m_strides;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Eval as lvalue
|
// Eval as lvalue
|
||||||
@ -296,11 +304,6 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
return this->m_impl.coeffRef(this->srcCoeff(index));
|
return this->m_impl.coeffRef(this->srcCoeff(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return this->m_impl; }
|
|
||||||
/// required by sycl in order to extract the accessor
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Strides functor() const { return this->m_strides; }
|
|
||||||
|
|
||||||
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index index, const PacketReturnType& x)
|
void writePacket(Index index, const PacketReturnType& x)
|
||||||
{
|
{
|
||||||
@ -310,6 +313,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
Index inputIndices[] = {0, 0};
|
Index inputIndices[] = {0, 0};
|
||||||
Index indices[] = {index, index + PacketSize - 1};
|
Index indices[] = {index, index + PacketSize - 1};
|
||||||
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = NumDims - 1; i > 0; --i) {
|
for (int i = NumDims - 1; i > 0; --i) {
|
||||||
const Index idx0 = indices[0] / this->m_outputStrides[i];
|
const Index idx0 = indices[0] / this->m_outputStrides[i];
|
||||||
const Index idx1 = indices[1] / this->m_outputStrides[i];
|
const Index idx1 = indices[1] / this->m_outputStrides[i];
|
||||||
@ -321,6 +325,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
inputIndices[0] += indices[0] * this->m_inputStrides[0];
|
inputIndices[0] += indices[0] * this->m_inputStrides[0];
|
||||||
inputIndices[1] += indices[1] * this->m_inputStrides[0];
|
inputIndices[1] += indices[1] * this->m_inputStrides[0];
|
||||||
} else { // RowMajor
|
} else { // RowMajor
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < NumDims - 1; ++i) {
|
for (int i = 0; i < NumDims - 1; ++i) {
|
||||||
const Index idx0 = indices[0] / this->m_outputStrides[i];
|
const Index idx0 = indices[0] / this->m_outputStrides[i];
|
||||||
const Index idx1 = indices[1] / this->m_outputStrides[i];
|
const Index idx1 = indices[1] / this->m_outputStrides[i];
|
||||||
@ -340,6 +345,7 @@ struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
|
|||||||
internal::pstore<Scalar, PacketReturnType>(values, x);
|
internal::pstore<Scalar, PacketReturnType>(values, x);
|
||||||
this->m_impl.coeffRef(inputIndices[0]) = values[0];
|
this->m_impl.coeffRef(inputIndices[0]) = values[0];
|
||||||
this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
|
this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 1; i < PacketSize-1; ++i) {
|
for (int i = 1; i < PacketSize-1; ++i) {
|
||||||
this->coeffRef(index+i) = values[i];
|
this->coeffRef(index+i) = values[i];
|
||||||
}
|
}
|
||||||
|
@ -91,6 +91,8 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -205,7 +207,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
|||||||
return m_dimensions;
|
return m_dimensions;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -249,6 +251,13 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Given the output index, finds the first index in the input tensor used to compute the trace
|
// Given the output index, finds the first index in the input tensor used to compute the trace
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
|
||||||
@ -276,7 +285,7 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
|
|||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
// Initialize the size of the trace dimension
|
// Initialize the size of the trace dimension
|
||||||
Index m_traceDim;
|
Index m_traceDim;
|
||||||
const Device& m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
array<bool, NumInputDims> m_reduced;
|
array<bool, NumInputDims> m_reduced;
|
||||||
array<Index, NumReducedDims> m_reducedDims;
|
array<Index, NumReducedDims> m_reducedDims;
|
||||||
array<Index, NumOutputDims> m_outputStrides;
|
array<Index, NumOutputDims> m_outputStrides;
|
||||||
|
@ -58,9 +58,6 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
|
|||||||
};
|
};
|
||||||
template <typename T> struct MakePointer {
|
template <typename T> struct MakePointer {
|
||||||
typedef T* Type;
|
typedef T* Type;
|
||||||
typedef T& RefType;
|
|
||||||
typedef T ScalarType;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
typedef typename MakePointer<Scalar>::Type PointerType;
|
typedef typename MakePointer<Scalar>::Type PointerType;
|
||||||
};
|
};
|
||||||
@ -80,9 +77,6 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
|
|||||||
};
|
};
|
||||||
template <typename T> struct MakePointer {
|
template <typename T> struct MakePointer {
|
||||||
typedef T* Type;
|
typedef T* Type;
|
||||||
typedef T& RefType;
|
|
||||||
typedef T ScalarType;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
typedef typename MakePointer<Scalar>::Type PointerType;
|
typedef typename MakePointer<Scalar>::Type PointerType;
|
||||||
};
|
};
|
||||||
@ -106,10 +100,6 @@ struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
|
|||||||
// Intermediate typedef to workaround MSVC issue.
|
// Intermediate typedef to workaround MSVC issue.
|
||||||
typedef MakePointer_<T> MakePointerT;
|
typedef MakePointer_<T> MakePointerT;
|
||||||
typedef typename MakePointerT::Type Type;
|
typedef typename MakePointerT::Type Type;
|
||||||
typedef typename MakePointerT::RefType RefType;
|
|
||||||
typedef typename MakePointerT::ScalarType ScalarType;
|
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
typedef typename MakePointer<Scalar>::Type PointerType;
|
typedef typename MakePointer<Scalar>::Type PointerType;
|
||||||
};
|
};
|
||||||
@ -135,49 +125,49 @@ struct traits<TensorRef<PlainObjectType> >
|
|||||||
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
|
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
|
||||||
struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
|
struct eval<Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type;
|
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
|
template<typename _Scalar, int NumIndices_, int Options, typename IndexType_>
|
||||||
struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
|
struct eval<const Tensor<_Scalar, NumIndices_, Options, IndexType_>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>& type;
|
typedef const Tensor<_Scalar, NumIndices_, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
|
template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
|
||||||
struct eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
|
struct eval<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
|
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
|
template<typename Scalar_, typename Dimensions, int Options, typename IndexType_>
|
||||||
struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
|
struct eval<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
|
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename PlainObjectType, int Options, template <class> class MakePointer>
|
template<typename PlainObjectType, int Options, template <class> class MakePointer>
|
||||||
struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
|
struct eval<TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
|
typedef const TensorMap<PlainObjectType, Options, MakePointer>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename PlainObjectType, int Options, template <class> class MakePointer>
|
template<typename PlainObjectType, int Options, template <class> class MakePointer>
|
||||||
struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
|
struct eval<const TensorMap<PlainObjectType, Options, MakePointer>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
|
typedef const TensorMap<PlainObjectType, Options, MakePointer>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename PlainObjectType>
|
template<typename PlainObjectType>
|
||||||
struct eval<TensorRef<PlainObjectType>, Eigen::Dense>
|
struct eval<TensorRef<PlainObjectType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorRef<PlainObjectType>& type;
|
typedef const TensorRef<PlainObjectType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename PlainObjectType>
|
template<typename PlainObjectType>
|
||||||
struct eval<const TensorRef<PlainObjectType>, Eigen::Dense>
|
struct eval<const TensorRef<PlainObjectType>, Eigen::Dense>
|
||||||
{
|
{
|
||||||
typedef const TensorRef<PlainObjectType>& type;
|
typedef const TensorRef<PlainObjectType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector.
|
// TODO nested<> does not exist anymore in Eigen/Core, and it thus has to be removed in favor of ref_selector.
|
||||||
@ -189,50 +179,50 @@ template<typename T, int n=1, typename PlainObject = void> struct nested
|
|||||||
template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
|
template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
|
||||||
struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
|
struct nested<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
|
||||||
{
|
{
|
||||||
typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type;
|
typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
|
template <typename Scalar_, int NumIndices_, int Options_, typename IndexType_>
|
||||||
struct nested<const Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
|
struct nested<const Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
|
||||||
{
|
{
|
||||||
typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>& type;
|
typedef const Tensor<Scalar_, NumIndices_, Options_, IndexType_>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
|
template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
|
||||||
struct nested<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
|
struct nested<TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
|
||||||
{
|
{
|
||||||
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
|
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
|
template <typename Scalar_, typename Dimensions, int Options, typename IndexType_>
|
||||||
struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
|
struct nested<const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_> >
|
||||||
{
|
{
|
||||||
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>& type;
|
typedef const TensorFixedSize<Scalar_, Dimensions, Options, IndexType_>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename PlainObjectType, int Options, template <class> class MakePointer>
|
template <typename PlainObjectType, int Options, template <class> class MakePointer>
|
||||||
struct nested<TensorMap<PlainObjectType, Options, MakePointer> >
|
struct nested<TensorMap<PlainObjectType, Options, MakePointer> >
|
||||||
{
|
{
|
||||||
typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
|
typedef const TensorMap<PlainObjectType, Options, MakePointer>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename PlainObjectType, int Options, template <class> class MakePointer>
|
template <typename PlainObjectType, int Options, template <class> class MakePointer>
|
||||||
struct nested<const TensorMap<PlainObjectType, Options, MakePointer> >
|
struct nested<const TensorMap<PlainObjectType, Options, MakePointer> >
|
||||||
{
|
{
|
||||||
typedef const TensorMap<PlainObjectType, Options, MakePointer>& type;
|
typedef const TensorMap<PlainObjectType, Options, MakePointer>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename PlainObjectType>
|
template <typename PlainObjectType>
|
||||||
struct nested<TensorRef<PlainObjectType> >
|
struct nested<TensorRef<PlainObjectType> >
|
||||||
{
|
{
|
||||||
typedef const TensorRef<PlainObjectType>& type;
|
typedef const TensorRef<PlainObjectType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename PlainObjectType>
|
template <typename PlainObjectType>
|
||||||
struct nested<const TensorRef<PlainObjectType> >
|
struct nested<const TensorRef<PlainObjectType> >
|
||||||
{
|
{
|
||||||
typedef const TensorRef<PlainObjectType>& type;
|
typedef const TensorRef<PlainObjectType>EIGEN_DEVICE_REF type;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
@ -91,24 +91,6 @@ class TensorVolumePatchOp : public TensorBase<TensorVolumePatchOp<Planes, Rows,
|
|||||||
m_padding_left(padding_left), m_padding_right(padding_right),
|
m_padding_left(padding_left), m_padding_right(padding_right),
|
||||||
m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
|
m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL // this is work around for sycl as Eigen could not use c++11 deligate constructor feature
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorVolumePatchOp(const XprType& expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols,
|
|
||||||
DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides,
|
|
||||||
DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides,
|
|
||||||
DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
|
|
||||||
bool padding_explicit, DenseIndex padding_top_z, DenseIndex padding_bottom_z,
|
|
||||||
DenseIndex padding_top, DenseIndex padding_bottom, DenseIndex padding_left,
|
|
||||||
DenseIndex padding_right, PaddingType padding_type, Scalar padding_value)
|
|
||||||
: m_xpr(expr), m_patch_planes(patch_planes), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
|
|
||||||
m_plane_strides(plane_strides), m_row_strides(row_strides), m_col_strides(col_strides),
|
|
||||||
m_in_plane_strides(in_plane_strides), m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
|
|
||||||
m_plane_inflate_strides(plane_inflate_strides), m_row_inflate_strides(row_inflate_strides),
|
|
||||||
m_col_inflate_strides(col_inflate_strides), m_padding_explicit(padding_explicit), m_padding_top_z(padding_top_z),
|
|
||||||
m_padding_bottom_z(padding_bottom_z), m_padding_top(padding_top), m_padding_bottom(padding_bottom), m_padding_left(padding_left),
|
|
||||||
m_padding_right(padding_right), m_padding_type(padding_type), m_padding_value(padding_value) {}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
DenseIndex patch_planes() const { return m_patch_planes; }
|
DenseIndex patch_planes() const { return m_patch_planes; }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -195,6 +177,8 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
typedef typename Storage::Type EvaluatorPointerType;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -205,16 +189,9 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
|||||||
CoordAccess = false,
|
CoordAccess = false,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
};
|
};
|
||||||
#ifdef __SYCL_DEVICE_ONLY__
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType op, const Device& device)
|
|
||||||
#else
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
: m_impl(op.expression(), device)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) :
|
||||||
#ifdef EIGEN_USE_SYCL
|
m_impl(op.expression(), device)
|
||||||
, m_op(op)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
EIGEN_STATIC_ASSERT((NumDims >= 5), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
|
||||||
@ -368,9 +345,10 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
|||||||
m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
|
m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
m_impl.evalSubExprsIfNeeded(NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -531,14 +509,10 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
|||||||
return TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
return TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return NULL; }
|
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
|
||||||
|
|
||||||
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
// Required by SYCL in order to construct the expression on the device
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& xpr() const { return m_op; }
|
|
||||||
#endif
|
|
||||||
|
|
||||||
Index planePaddingTop() const { return m_planePaddingTop; }
|
Index planePaddingTop() const { return m_planePaddingTop; }
|
||||||
Index rowPaddingTop() const { return m_rowPaddingTop; }
|
Index rowPaddingTop() const { return m_rowPaddingTop; }
|
||||||
@ -556,10 +530,17 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
|||||||
Index rowInflateStride() const { return m_row_inflate_strides; }
|
Index rowInflateStride() const { return m_row_inflate_strides; }
|
||||||
Index colInflateStride() const { return m_col_inflate_strides; }
|
Index colInflateStride() const { return m_col_inflate_strides; }
|
||||||
|
|
||||||
|
#ifdef EIGEN_USE_SYCL
|
||||||
|
// binding placeholder accessors to a command group handler for SYCL
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
|
||||||
|
m_impl.bind(cgh);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||||
{
|
{
|
||||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||||
|
EIGEN_UNROLL_LOOP
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
values[i] = coeff(index+i);
|
values[i] = coeff(index+i);
|
||||||
}
|
}
|
||||||
@ -635,10 +616,6 @@ struct TensorEvaluator<const TensorVolumePatchOp<Planes, Rows, Cols, ArgType>, D
|
|||||||
|
|
||||||
TensorEvaluator<ArgType, Device> m_impl;
|
TensorEvaluator<ArgType, Device> m_impl;
|
||||||
|
|
||||||
#ifdef EIGEN_USE_SYCL
|
|
||||||
// Required by SYCL in order to construct the expression on the device
|
|
||||||
XprType m_op;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -527,26 +527,32 @@ static void test_execute_generator_op(Device d)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_DONT_VECTORIZE
|
||||||
|
#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
|
||||||
|
#else
|
||||||
|
#define VECTORIZABLE(VAL) VAL
|
||||||
|
#endif
|
||||||
|
|
||||||
#define CALL_SUBTEST_PART(PART) \
|
#define CALL_SUBTEST_PART(PART) \
|
||||||
CALL_SUBTEST_##PART
|
CALL_SUBTEST_##PART
|
||||||
|
|
||||||
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
|
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, false, ColMajor>(default_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, false, ColMajor>(default_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, true, ColMajor>(default_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, true, ColMajor>(default_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, true, false, ColMajor>(default_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), false, ColMajor>(default_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, true, true, ColMajor>(default_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), true, ColMajor>(default_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, false, RowMajor>(default_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, false, RowMajor>(default_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, true, RowMajor>(default_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, true, RowMajor>(default_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, true, false, RowMajor>(default_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), false, RowMajor>(default_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, true, true, RowMajor>(default_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), true, RowMajor>(default_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, false, ColMajor>(tp_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, false, ColMajor>(tp_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, true, ColMajor>(tp_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, true, ColMajor>(tp_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, true, false, ColMajor>(tp_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), false, ColMajor>(tp_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, true, true, ColMajor>(tp_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), true, ColMajor>(tp_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, false, RowMajor>(tp_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, false, RowMajor>(tp_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, true, RowMajor>(tp_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, true, RowMajor>(tp_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, true, false, RowMajor>(tp_device))); \
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), false, RowMajor>(tp_device))); \
|
||||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, true, true, RowMajor>(tp_device)))
|
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), true, RowMajor>(tp_device)))
|
||||||
|
|
||||||
EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
|
EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
|
||||||
Eigen::DefaultDevice default_device;
|
Eigen::DefaultDevice default_device;
|
||||||
|
@ -51,7 +51,8 @@ static void test_static_reshape() {
|
|||||||
|
|
||||||
// New dimensions: [2, 3, 7]
|
// New dimensions: [2, 3, 7]
|
||||||
Eigen::IndexList<type2index<2>, type2index<3>, type2index<7>> dim;
|
Eigen::IndexList<type2index<2>, type2index<3>, type2index<7>> dim;
|
||||||
Tensor<float, 3> reshaped = tensor.reshape(dim);
|
Tensor<float, 3> reshaped = tensor.reshape(static_cast<Eigen::DSizes<long,3>>(dim));
|
||||||
|
|
||||||
|
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
for (int j = 0; j < 3; ++j) {
|
for (int j = 0; j < 3; ++j) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user