Adding Sycl backend for TensorImagePatchOP.h; adding Sycl backend for TensorInflation.h.

2025-07-27 15:22:00 +08:00 · 2017-02-20 12:11:05 +00:00 · 2017-02-20 12:11:05 +00:00 · 79ebc8f761
commit 79ebc8f761
parent 91982b91c0
11 changed files with 1356 additions and 12 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@ -70,12 +70,8 @@ class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprT
                                                           DenseIndex in_row_strides, DenseIndex in_col_strides,
                                                           DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
                                                           PaddingType padding_type, Scalar padding_value)
-      : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
+      : TensorImagePatchOp(expr, patch_rows, patch_cols, row_strides,col_strides, in_row_strides, in_col_strides, row_inflate_strides,
-        m_row_strides(row_strides), m_col_strides(col_strides),
+         col_inflate_strides, 0,0,0,0,padding_value, padding_type, false ){}
        m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
        m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
        m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0),
        m_padding_type(padding_type), m_padding_value(padding_value) {}
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols,
                                                           DenseIndex row_strides, DenseIndex col_strides,
@ -83,14 +79,15 @@ class TensorImagePatchOp : public TensorBase<TensorImagePatchOp<Rows, Cols, XprT
                                                           DenseIndex row_inflate_strides, DenseIndex col_inflate_strides,
                                                           DenseIndex padding_top, DenseIndex padding_bottom,
                                                           DenseIndex padding_left, DenseIndex padding_right,
-                                                           Scalar padding_value)
+                                                           Scalar padding_value, PaddingType padding_type=PADDING_VALID,
                                                           bool padding_explicit=true)
      : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols),
        m_row_strides(row_strides), m_col_strides(col_strides),
        m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides),
        m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides),
-        m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
+        m_padding_explicit(padding_explicit), m_padding_top(padding_top), m_padding_bottom(padding_bottom),
        m_padding_left(padding_left), m_padding_right(padding_right),
-        m_padding_type(PADDING_VALID), m_padding_value(padding_value) {}
+        m_padding_type(padding_type), m_padding_value(padding_value) {}
    EIGEN_DEVICE_FUNC
    DenseIndex patch_rows() const { return m_patch_rows; }
@ -172,7 +169,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
  };
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device)
+      : m_impl(op.expression(), device), m_op(op)
  {
    EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
@ -241,6 +238,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
          break;
        default:
          eigen_assert(false && "unexpected padding");
          m_outputCols=0; // silence the uninitialised warnig;
          m_outputRows=0; //// silence the uninitialised warnig;
      }
    }
    eigen_assert(m_outputRows > 0);
@ -420,7 +419,10 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
  // required by sycl
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& xpr() const { return m_op; }
  Index rowPaddingTop() const { return m_rowPaddingTop; }
  Index colPaddingLeft() const { return m_colPaddingLeft; }
@ -501,6 +503,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
  Scalar m_paddingValue;
  TensorEvaluator<ArgType, Device> m_impl;
  // required for sycl
  const XprType& m_op;
 };
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h
@ -215,6 +215,12 @@ struct TensorEvaluator<const TensorInflationOp<Strides, ArgType>, Device>
  EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
  /// required by sycl in order to extract the accessor
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
  /// required by sycl in order to extract the accessor
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE  const Strides& functor() const { return m_strides; }
 protected:
  Dimensions m_dimensions;
  array<Index, NumDims> m_outputStrides;
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
@ -165,6 +165,20 @@ KERNELBROKERCONVERTCHIPPINGOP()
 /// specialisation of the \ref ConvertToDeviceExpression struct when the node type is TensorImagePatchOp
 #define KERNELBROKERCONVERTIMAGEPATCHOP(CVQual)\
 template<DenseIndex Rows, DenseIndex Cols, typename XprType>\
 struct ConvertToDeviceExpression<CVQual TensorImagePatchOp<Rows, Cols, XprType> >{\
  typedef CVQual TensorImagePatchOp<Rows, Cols, typename ConvertToDeviceExpression<XprType>::Type> Type;\
 };
 KERNELBROKERCONVERTIMAGEPATCHOP(const)
 KERNELBROKERCONVERTIMAGEPATCHOP()
 #undef KERNELBROKERCONVERTIMAGEPATCHOP
 }  // namespace internal
 }  // namespace TensorSycl
 }  // namespace Eigen
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
@ -385,6 +385,24 @@ SYCLTENSORCHIPPINGOPEXPR(const)
 SYCLTENSORCHIPPINGOPEXPR()
 #undef SYCLTENSORCHIPPINGOPEXPR
 // TensorImagePatchOp
 #define SYCLTENSORIMAGEPATCHOPEXPR(CVQual)\
 template<DenseIndex Rows, DenseIndex Cols, typename OrigXprType, typename XprType, typename... Params>\
 struct  ExprConstructor<CVQual TensorImagePatchOp<Rows, Cols, OrigXprType>, CVQual TensorImagePatchOp<Rows, Cols, XprType>, Params... > {\
  typedef ExprConstructor<OrigXprType, XprType, Params...> my_xpr_type;\
  typedef CVQual TensorImagePatchOp<Rows, Cols, typename my_xpr_type::Type> Type;\
  my_xpr_type xprExpr;\
  Type expr;\
  template <typename FuncDetector>\
  ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple<Params...> &t)\
  : xprExpr(funcD.xprExpr, t), expr(xprExpr.expr, funcD.m_patch_rows, funcD.m_patch_cols, funcD.m_row_strides, funcD.m_col_strides,\
    funcD.m_in_row_strides, funcD.m_in_col_strides, funcD.m_row_inflate_strides, funcD.m_col_inflate_strides, \
    funcD.m_padding_top, funcD.m_padding_bottom, funcD.m_padding_left, funcD.m_padding_right, funcD.m_padding_value, funcD.m_padding_type, funcD.m_padding_explicit){}\
 };
 SYCLTENSORIMAGEPATCHOPEXPR(const)
 SYCLTENSORIMAGEPATCHOPEXPR()
 #undef SYCLTENSORIMAGEPATCHOPEXPR
 // TensorLayoutSwapOp
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
@ -226,6 +226,20 @@ SYCLTENSORCHIPPINGOPEXTACC()
 #undef SYCLTENSORCHIPPINGOPEXTACC
 // specialisation of the \ref ExtractAccessor struct when the node type is
 /// TensorImagePatchOp.
 #define SYCLTENSORIMAGEPATCHOPEXTACC(CVQual)\
 template<DenseIndex Rows, DenseIndex Cols, typename XprType, typename Dev>\
 struct ExtractAccessor<TensorEvaluator<CVQual TensorImagePatchOp<Rows, Cols, XprType>, Dev> >{\
  static inline auto getTuple(cl::sycl::handler& cgh, const TensorEvaluator<CVQual TensorImagePatchOp<Rows, Cols, XprType>, Dev>& eval)\
  RETURN_CPP11(AccessorConstructor::getTuple(cgh, eval.impl()))\
 };
 SYCLTENSORIMAGEPATCHOPEXTACC(const)
 SYCLTENSORIMAGEPATCHOPEXTACC()
 #undef SYCLTENSORIMAGEPATCHOPEXTACC
 // specialisation of the \ref ExtractAccessor struct when the node type is
 /// TensorLayoutSwapOp.
 #define SYCLTENSORLAYOUTSWAPOPEXTACC(CVQual)\
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h
@ -296,7 +296,7 @@ SYCLEXTRFUNCCONTRACTCONCAT(TensorConcatenationOp, axis(),)
 //TensorChippingOp
 #define SYCLEXTRFUNCCHIPPINGOP(CVQual)\
 template<DenseIndex DimId, typename XprType, typename Device>\
-struct FunctorExtractor<TensorEvaluator<CVQual TensorChippingOp<DimId, XprType>, Device>>{\
+struct FunctorExtractor<TensorEvaluator<CVQual TensorChippingOp<DimId, XprType>, Device> >{\
  FunctorExtractor<Eigen::TensorEvaluator<XprType, Device> > xprExpr;\
  const DenseIndex m_dim;\
  const DenseIndex m_offset;\
@ -310,6 +310,40 @@ SYCLEXTRFUNCCHIPPINGOP(const)
 SYCLEXTRFUNCCHIPPINGOP()
 #undef SYCLEXTRFUNCCHIPPINGOP
 #define SYCLEXTRFUNCIMAGEPATCHOP(CVQual)\
 template<DenseIndex Rows, DenseIndex Cols, typename XprType, typename Device>\
 struct FunctorExtractor<TensorEvaluator<CVQual TensorImagePatchOp<Rows, Cols, XprType>, Device> >{\
 typedef CVQual TensorImagePatchOp<Rows, Cols, XprType> Self;\
 FunctorExtractor<Eigen::TensorEvaluator<XprType, Device> > xprExpr;\
 const DenseIndex m_patch_rows;\
 const DenseIndex m_patch_cols;\
 const DenseIndex m_row_strides;\
 const DenseIndex m_col_strides;\
 const DenseIndex m_in_row_strides;\
 const DenseIndex m_in_col_strides;\
 const DenseIndex m_row_inflate_strides;\
 const DenseIndex m_col_inflate_strides;\
 const bool m_padding_explicit;\
 const DenseIndex m_padding_top;\
 const DenseIndex m_padding_bottom;\
 const DenseIndex m_padding_left;\
 const DenseIndex m_padding_right;\
 const PaddingType m_padding_type;\
 const typename Self::Scalar m_padding_value;\
 FunctorExtractor(const TensorEvaluator<Self, Device>& expr)\
 : xprExpr(expr.impl()), m_patch_rows(expr.xpr().patch_rows()), m_patch_cols(expr.xpr().patch_cols()),\
  m_row_strides(expr.xpr().row_strides()), m_col_strides(expr.xpr().col_strides()),\
  m_in_row_strides(expr.xpr().in_row_strides()), m_in_col_strides(expr.xpr().in_col_strides()),\
  m_row_inflate_strides(expr.xpr().row_inflate_strides()), m_col_inflate_strides(expr.xpr().col_inflate_strides()),\
  m_padding_explicit(expr.xpr().padding_explicit()),m_padding_top(expr.xpr().padding_top()),\
  m_padding_bottom(expr.xpr().padding_bottom()), m_padding_left(expr.xpr().padding_left()),\
  m_padding_right(expr.xpr().padding_right()), m_padding_type(expr.xpr().padding_type()),\
  m_padding_value(expr.xpr().padding_value()){}\
 };
 SYCLEXTRFUNCIMAGEPATCHOP(const)
 SYCLEXTRFUNCIMAGEPATCHOP()
 #undef SYCLEXTRFUNCIMAGEPATCHOP
 /// template deduction function for FunctorExtractor
 template <typename Evaluator>
 auto inline extractFunctors(const Evaluator& evaluator)-> FunctorExtractor<Evaluator> {
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h
@ -161,6 +161,16 @@ SLICESTRIDEOPLEAFCOUNT()
 #undef SLICESTRIDEOPLEAFCOUNT
 #define TENSORIMAGEPATCHOPLEAFCOUNT(CVQual)\
 template<DenseIndex Rows, DenseIndex Cols, typename XprType>\
 struct LeafCount<CVQual TensorImagePatchOp<Rows, Cols, XprType> >:CategoryCount<XprType>{};
 TENSORIMAGEPATCHOPLEAFCOUNT(const)
 TENSORIMAGEPATCHOPLEAFCOUNT()
 #undef TENSORIMAGEPATCHOPLEAFCOUNT
 } /// namespace TensorSycl
 } /// namespace internal
 } /// namespace Eigen
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
@ -221,6 +221,20 @@ SYCLSLICESTRIDEOPPLH()
 #undef SYCLSLICESTRIDEOPPLH
 /// specialisation of the \ref PlaceHolderExpression when the node is
 /// TensorImagePatchOp
 #define SYCLTENSORIMAGEPATCHOP(CVQual)\
 template<DenseIndex Rows, DenseIndex Cols, typename XprType, size_t N>\
 struct PlaceHolderExpression<CVQual TensorImagePatchOp<Rows, Cols, XprType>, N> {\
  typedef CVQual TensorImagePatchOp<Rows, Cols, typename CalculateIndex <N, XprType>::ArgType> Type;\
 };
 SYCLTENSORIMAGEPATCHOP(const)
 SYCLTENSORIMAGEPATCHOP()
 #undef SYCLTENSORIMAGEPATCHOP
 /// template deduction for \ref PlaceHolderExpression struct
 template <typename Expr>
 struct createPlaceHolderExpression {
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@ -169,6 +169,8 @@ if(EIGEN_TEST_CXX11)
    ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11")
    ei_add_test_sycl(cxx11_tensor_patch_sycl "-std=c++11")
    ei_add_test_sycl(cxx11_tensor_layout_swap_sycl "-std=c++11")
    ei_add_test_sycl(cxx11_tensor_image_patchOP_sycl "-std=c++11")
    ei_add_test_sycl(cxx11_tensor_inflation_sycl "-std=c++11")
  endif(EIGEN_TEST_SYCL)
  # It should be safe to always run these tests as there is some fallback code for
  # older compiler that don't support cxx11.
--- a/unsupported/test/cxx11_tensor_image_patchOP_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_image_patchOP_sycl.cpp
--- a/unsupported/test/cxx11_tensor_inflation_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_inflation_sycl.cpp
@ -0,0 +1,136 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2016
 // Mehdi Goli    Codeplay Software Ltd.
 // Ralph Potter  Codeplay Software Ltd.
 // Luke Iwanski  Codeplay Software Ltd.
 // Contact: <eigen@codeplay.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cxx11_tensor_inflation_sycl
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
 #define EIGEN_USE_SYCL
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 using Eigen::Tensor;
 // Inflation Defenition for each dimention the inflated val would be
 //((dim-1)*strid[dim] +1)
 // for 1 dimnention vector of size 3 with value (4,4,4) with the inflated stride value of 3 would be changed to
 // tensor of size (2*3) +1 = 7 with the value of
 // (4, 0, 0, 4, 0, 0, 4).
 template <typename DataType, int DataLayout, typename IndexType>
 void test_simple_inflation_sycl(const Eigen::SyclDevice &sycl_device) {
  IndexType sizeDim1 = 2;
  IndexType sizeDim2 = 3;
  IndexType sizeDim3 = 5;
  IndexType sizeDim4 = 7;
  array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
  Tensor<DataType, 4, DataLayout,IndexType> tensor(tensorRange);
  Tensor<DataType, 4, DataLayout,IndexType> no_stride(tensorRange);
  tensor.setRandom();
  array<IndexType, 4> strides;
  strides[0] = 1;
  strides[1] = 1;
  strides[2] = 1;
  strides[3] = 1;
  const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
  DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
  DataType* gpu_data_no_stride  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_no_stride(gpu_data_no_stride, tensorRange);
  sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
  gpu_no_stride.device(sycl_device)=gpu_tensor.inflate(strides);
  sycl_device.memcpyDeviceToHost(no_stride.data(), gpu_data_no_stride, tensorBuffSize);
  VERIFY_IS_EQUAL(no_stride.dimension(0), sizeDim1);
  VERIFY_IS_EQUAL(no_stride.dimension(1), sizeDim2);
  VERIFY_IS_EQUAL(no_stride.dimension(2), sizeDim3);
  VERIFY_IS_EQUAL(no_stride.dimension(3), sizeDim4);
  for (IndexType i = 0; i < 2; ++i) {
    for (IndexType j = 0; j < 3; ++j) {
      for (IndexType k = 0; k < 5; ++k) {
        for (IndexType l = 0; l < 7; ++l) {
          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l));
        }
      }
    }
  }
  strides[0] = 2;
  strides[1] = 4;
  strides[2] = 2;
  strides[3] = 3;
  IndexType inflatedSizeDim1 = 3;
  IndexType inflatedSizeDim2 = 9;
  IndexType inflatedSizeDim3 = 9;
  IndexType inflatedSizeDim4 = 19;
  array<IndexType, 4> inflatedTensorRange = {{inflatedSizeDim1, inflatedSizeDim2, inflatedSizeDim3, inflatedSizeDim4}};
  Tensor<DataType, 4, DataLayout, IndexType> inflated(inflatedTensorRange);
  const size_t inflatedTensorBuffSize =inflated.size()*sizeof(DataType);
  DataType* gpu_data_inflated  = static_cast<DataType*>(sycl_device.allocate(inflatedTensorBuffSize));
  TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_inflated(gpu_data_inflated, inflatedTensorRange);
  gpu_inflated.device(sycl_device)=gpu_tensor.inflate(strides);
  sycl_device.memcpyDeviceToHost(inflated.data(), gpu_data_inflated, inflatedTensorBuffSize);
  VERIFY_IS_EQUAL(inflated.dimension(0), inflatedSizeDim1);
  VERIFY_IS_EQUAL(inflated.dimension(1), inflatedSizeDim2);
  VERIFY_IS_EQUAL(inflated.dimension(2), inflatedSizeDim3);
  VERIFY_IS_EQUAL(inflated.dimension(3), inflatedSizeDim4);
  for (IndexType i = 0; i < inflatedSizeDim1; ++i) {
    for (IndexType j = 0; j < inflatedSizeDim2; ++j) {
      for (IndexType k = 0; k < inflatedSizeDim3; ++k) {
        for (IndexType l = 0; l < inflatedSizeDim4; ++l) {
          if (i % strides[0] == 0 &&
              j % strides[1] == 0 &&
              k % strides[2] == 0 &&
              l % strides[3] == 0) {
            VERIFY_IS_EQUAL(inflated(i,j,k,l),
                            tensor(i/strides[0], j/strides[1], k/strides[2], l/strides[3]));
          } else {
            VERIFY_IS_EQUAL(0, inflated(i,j,k,l));
          }
        }
      }
    }
  }
  sycl_device.deallocate(gpu_data_tensor);
  sycl_device.deallocate(gpu_data_no_stride);
  sycl_device.deallocate(gpu_data_inflated);
 }
 template<typename DataType, typename dev_Selector> void sycl_inflation_test_per_device(dev_Selector s){
  QueueInterface queueInterface(s);
  auto sycl_device = Eigen::SyclDevice(&queueInterface);
  test_simple_inflation_sycl<DataType, RowMajor, int64_t>(sycl_device);
  test_simple_inflation_sycl<DataType, ColMajor, int64_t>(sycl_device);
 }
 void test_cxx11_tensor_inflation_sycl()
 {
  for (const auto& device :Eigen::get_sycl_supported_devices()) {
    CALL_SUBTEST(sycl_inflation_test_per_device<float>(device));
  }
 }