Merged with upstream eigen

This commit is contained in:
Eugene Zhulenev 2018-08-08 16:57:58 -07:00
commit 1c8b9e10a7
22 changed files with 190 additions and 65 deletions

View File

@ -200,6 +200,12 @@ using std::ptrdiff_t;
#include "src/Core/arch/GPU/MathFunctions.h" #include "src/Core/arch/GPU/MathFunctions.h"
#endif #endif
#if defined EIGEN_VECTORIZE_SYCL
#include "src/Core/arch/SYCL/InteropHeaders.h"
#include "src/Core/arch/SYCL/PacketMath.h"
#include "src/Core/arch/SYCL/MathFunctions.h"
#include "src/Core/arch/SYCL/TypeCasting.h"
#endif
#include "src/Core/arch/Default/Settings.h" #include "src/Core/arch/Default/Settings.h"
#include "src/Core/functors/TernaryFunctors.h" #include "src/Core/functors/TernaryFunctors.h"

View File

@ -83,7 +83,11 @@ struct __half_raw {
#if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000 #if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw // In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
typedef __half __half_raw; typedef __half __half_raw;
#endif #endif // defined(EIGEN_HAS_CUDA_FP16)
#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
typedef cl::sycl::half __half_raw;
#endif #endif
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x); EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
@ -200,6 +204,7 @@ struct half : public half_impl::half_base {
x = other.x; x = other.x;
return *this; return *this;
} }
}; };
} // end namespace Eigen } // end namespace Eigen

View File

@ -571,20 +571,19 @@
// Does the compiler fully support const expressions? (as in c++14) // Does the compiler fully support const expressions? (as in c++14)
#ifndef EIGEN_HAS_CONSTEXPR #ifndef EIGEN_HAS_CONSTEXPR
#if defined(EIGEN_CUDACC) #if defined(EIGEN_CUDACC)
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500)) #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
#define EIGEN_HAS_CONSTEXPR 1 #define EIGEN_HAS_CONSTEXPR 1
#endif #endif
#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
(EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \ (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \
(EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L))) (EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L)))
#define EIGEN_HAS_CONSTEXPR 1 #define EIGEN_HAS_CONSTEXPR 1
#endif #endif
#ifndef EIGEN_HAS_CONSTEXPR #ifndef EIGEN_HAS_CONSTEXPR
#define EIGEN_HAS_CONSTEXPR 0 #define EIGEN_HAS_CONSTEXPR 0
#endif #endif
#endif // EIGEN_HAS_CONSTEXPR #endif // EIGEN_HAS_CONSTEXPR
@ -643,9 +642,12 @@
#ifdef __CUDACC_RELAXED_CONSTEXPR__ #ifdef __CUDACC_RELAXED_CONSTEXPR__
#define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
#endif #endif
#elif defined(__clang__) && defined(__CUDA__) // See bug 1580: clang/CUDA fails to make the following calls
// clang++ always considers constexpr functions as implicitly __host__ __device__ // to constexpr bool std::equal_to::operator() even when
#define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC // EIGEN_CONSTEXPR_ARE_DEVICE_FUNC is defined in c++14 only.
// #elif defined(__clang__) && defined(__CUDA__) && EIGEN_HAS_CONSTEXPR == 1
// // clang++ always considers constexpr functions as implicitly __host__ __device__
// #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC
#endif #endif
#endif #endif
@ -1076,11 +1078,13 @@ namespace Eigen {
# endif # endif
#endif #endif
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_HAS_VARIADIC_TEMPLATES
// The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input. // The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input.
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
bool all(){ return true; }
inline bool all(){ return true; }
template<typename T, typename ...Ts> template<typename T, typename ...Ts>
bool all(T t, Ts ... ts){ return t && all(ts...); } bool all(T t, Ts ... ts){ return t && all(ts...); }
@ -1088,5 +1092,15 @@ bool all(T t, Ts ... ts){ return t && all(ts...); }
} }
#endif #endif
// Wrapping #pragma unroll in a macro since it is required for SYCL
#if defined(__SYCL_DEVICE_ONLY__)
#if defined(_MSC_VER)
#define EIGEN_UNROLL_LOOP __pragma(unroll)
#else
#define EIGEN_UNROLL_LOOP _Pragma("unroll")
#endif
#else
#define EIGEN_UNROLL_LOOP
#endif
#endif // EIGEN_MACROS_H #endif // EIGEN_MACROS_H

View File

@ -15,6 +15,7 @@
#define ALIGNMENT 1 #define ALIGNMENT 1
#endif #endif
typedef Matrix<float,16,1> Vector16f;
typedef Matrix<float,8,1> Vector8f; typedef Matrix<float,8,1> Vector8f;
void check_handmade_aligned_malloc() void check_handmade_aligned_malloc()
@ -70,7 +71,7 @@ struct MyStruct
{ {
EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW
char dummychar; char dummychar;
Vector8f avec; Vector16f avec;
}; };
class MyClassA class MyClassA
@ -78,7 +79,7 @@ class MyClassA
public: public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW
char dummychar; char dummychar;
Vector8f avec; Vector16f avec;
}; };
template<typename T> void check_dynaligned() template<typename T> void check_dynaligned()
@ -145,6 +146,7 @@ EIGEN_DECLARE_TEST(dynalloc)
CALL_SUBTEST(check_dynaligned<Vector4d>() ); CALL_SUBTEST(check_dynaligned<Vector4d>() );
CALL_SUBTEST(check_dynaligned<Vector4i>() ); CALL_SUBTEST(check_dynaligned<Vector4i>() );
CALL_SUBTEST(check_dynaligned<Vector8f>() ); CALL_SUBTEST(check_dynaligned<Vector8f>() );
CALL_SUBTEST(check_dynaligned<Vector16f>() );
} }
{ {

View File

@ -193,7 +193,7 @@ namespace Eigen
#define EIGEN_DEFAULT_IO_FORMAT IOFormat(4, 0, " ", "\n", "", "", "", "") #define EIGEN_DEFAULT_IO_FORMAT IOFormat(4, 0, " ", "\n", "", "", "", "")
#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) #if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) && !defined(__SYCL_DEVICE_ONLY__)
#define EIGEN_EXCEPTIONS #define EIGEN_EXCEPTIONS
#endif #endif
@ -272,7 +272,7 @@ namespace Eigen
} }
#endif //EIGEN_EXCEPTIONS #endif //EIGEN_EXCEPTIONS
#elif !defined(__CUDACC__) && !defined(__HIPCC__)// EIGEN_DEBUG_ASSERTS #elif !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__) // EIGEN_DEBUG_ASSERTS
// see bug 89. The copy_bool here is working around a bug in gcc <= 4.3 // see bug 89. The copy_bool here is working around a bug in gcc <= 4.3
#define eigen_assert(a) \ #define eigen_assert(a) \
if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\ if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\
@ -329,7 +329,7 @@ namespace Eigen
std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n"; std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n";
#endif #endif
#if !defined(__CUDACC__) && !defined(__HIPCC__) #if !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__)
#define EIGEN_USE_CUSTOM_ASSERT #define EIGEN_USE_CUSTOM_ASSERT
#endif #endif

View File

@ -538,8 +538,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
// Fourier transforms // Fourier transforms
template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection> const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>
fft(const FFT& fft) const { fft(const FFT& dims) const {
return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), fft); return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), dims);
} }
// Scan. // Scan.
@ -723,8 +723,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorBroadcastingOp<const Broadcast, const Derived> const TensorBroadcastingOp<const Broadcast, const Derived>
broadcast(const Broadcast& broadcast) const { broadcast(const Broadcast& bcast) const {
return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast); return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), bcast);
} }
template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -832,8 +832,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
} }
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorShufflingOp<const Shuffle, const Derived> const TensorShufflingOp<const Shuffle, const Derived>
shuffle(const Shuffle& shuffle) const { shuffle(const Shuffle& shfl) const {
return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle); return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl);
} }
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorStridingOp<const Strides, const Derived> const TensorStridingOp<const Strides, const Derived>
@ -1030,13 +1030,13 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorShufflingOp<const Shuffle, const Derived> const TensorShufflingOp<const Shuffle, const Derived>
shuffle(const Shuffle& shuffle) const { shuffle(const Shuffle& shfl) const {
return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle); return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl);
} }
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorShufflingOp<const Shuffle, Derived> TensorShufflingOp<const Shuffle, Derived>
shuffle(const Shuffle& shuffle) { shuffle(const Shuffle& shfl) {
return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle); return TensorShufflingOp<const Shuffle, Derived>(derived(), shfl);
} }
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -1052,8 +1052,8 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
// Select the device on which to evaluate the expression. // Select the device on which to evaluate the expression.
template <typename DeviceType> template <typename DeviceType>
TensorDevice<Derived, DeviceType> device(const DeviceType& device) { TensorDevice<Derived, DeviceType> device(const DeviceType& dev) {
return TensorDevice<Derived, DeviceType>(device, derived()); return TensorDevice<Derived, DeviceType>(dev, derived());
} }
protected: protected:

View File

@ -89,7 +89,7 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
// policy if block shapes/sizes conflict). // policy if block shapes/sizes conflict).
*block_shape = resources[0].block_shape; *block_shape = resources[0].block_shape;
*block_total_size = resources[0].block_total_size; *block_total_size = resources[0].block_total_size;
for (int i = 1; i < resources.size(); ++i) { for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims && if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims &&
*block_shape != TensorBlockShapeType::kSkewedInnerDims) { *block_shape != TensorBlockShapeType::kSkewedInnerDims) {
*block_shape = TensorBlockShapeType::kSkewedInnerDims; *block_shape = TensorBlockShapeType::kSkewedInnerDims;

View File

@ -274,8 +274,8 @@ struct TensorContractionEvaluatorBase
op.lhsExpression(), op.rhsExpression()), device), op.lhsExpression(), op.rhsExpression()), device),
m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(), m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),
op.rhsExpression(), op.lhsExpression()), device), op.rhsExpression(), op.lhsExpression()), device),
m_output_kernel(op.outputKernel()),
m_device(device), m_device(device),
m_output_kernel(op.outputKernel()),
m_result(NULL) { m_result(NULL) {
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)),

View File

@ -527,8 +527,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
Scalar* local = (Scalar*)m_device.allocate(kernel_sz); Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
typedef TensorEvalToOp<const KernelArgType> EvalTo; typedef TensorEvalToOp<const KernelArgType> EvalTo;
EvalTo evalToTmp(local, m_kernelArg); EvalTo evalToTmp(local, m_kernelArg);
const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value; const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device); internal::TensorExecutor<const EvalTo, Device, Vectorize>::run(evalToTmp, m_device);
m_kernel = local; m_kernel = local;
m_local_kernel = true; m_local_kernel = true;
@ -786,7 +786,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
}; };
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device) EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)
: m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device) : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
{ {
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);

View File

@ -91,18 +91,31 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
} }
} }
// An abstract interface to a device specific memory allocator.
class Allocator {
public:
virtual ~Allocator() {}
EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0;
EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0;
};
// Build a thread pool device on top the an existing pool of threads. // Build a thread pool device on top the an existing pool of threads.
struct ThreadPoolDevice { struct ThreadPoolDevice {
// The ownership of the thread pool remains with the caller. // The ownership of the thread pool remains with the caller.
ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr)
: pool_(pool), num_threads_(num_cores), allocator_(allocator) { }
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
return internal::aligned_malloc(num_bytes); return allocator_ ? allocator_->allocate(num_bytes)
: internal::aligned_malloc(num_bytes);
} }
EIGEN_STRONG_INLINE void deallocate(void* buffer) const { EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
internal::aligned_free(buffer); if (allocator_) {
allocator_->deallocate(buffer);
} else {
internal::aligned_free(buffer);
}
} }
EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
@ -275,9 +288,13 @@ struct ThreadPoolDevice {
// Thread pool accessor. // Thread pool accessor.
ThreadPoolInterface* getPool() const { return pool_; } ThreadPoolInterface* getPool() const { return pool_; }
// Allocator accessor.
Allocator* allocator() const { return allocator_; }
private: private:
ThreadPoolInterface* pool_; ThreadPoolInterface* pool_;
int num_threads_; int num_threads_;
Allocator* allocator_;
}; };

View File

@ -126,7 +126,7 @@ struct TensorEvaluator
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
std::vector<internal::TensorOpResourceRequirements>* resources) const {} std::vector<internal::TensorOpResourceRequirements>*) const {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
assert(m_data != NULL); assert(m_data != NULL);
@ -255,7 +255,7 @@ struct TensorEvaluator<const Derived, Device>
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
std::vector<internal::TensorOpResourceRequirements>* resources) const {} std::vector<internal::TensorOpResourceRequirements>*) const {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
assert(m_data != NULL); assert(m_data != NULL);

View File

@ -124,8 +124,8 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
} }
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo; typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
EvalTo evalToTmp(m_buffer, m_op); EvalTo evalToTmp(m_buffer, m_op);
const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value; const bool Vectorize = internal::IsVectorizable<Device, const ArgType>::value;
internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device); internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, Vectorize>::run(evalToTmp, m_device);
return true; return true;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {

View File

@ -21,6 +21,7 @@ namespace Eigen {
template<typename T> struct MakePointer { template<typename T> struct MakePointer {
typedef T* Type; typedef T* Type;
typedef T& RefType; typedef T& RefType;
typedef T ScalarType;
}; };
namespace internal{ namespace internal{
@ -97,7 +98,7 @@ template<typename XprType> class TensorForcedEvalOp;
template<typename ExpressionType, typename DeviceType> class TensorDevice; template<typename ExpressionType, typename DeviceType> class TensorDevice;
template<typename Derived, typename Device> struct TensorEvaluator; template<typename Derived, typename Device> struct TensorEvaluator;
class NoOpOutputKernel; struct NoOpOutputKernel;
struct DefaultDevice; struct DefaultDevice;
struct ThreadPoolDevice; struct ThreadPoolDevice;

View File

@ -61,8 +61,8 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind; typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index; typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shfl)
: m_xpr(expr), m_shuffle(shuffle) {} : m_xpr(expr), m_shuffle(shfl) {}
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const Shuffle& shufflePermutation() const { return m_shuffle; } const Shuffle& shufflePermutation() const { return m_shuffle; }

View File

@ -273,11 +273,11 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
Dimensions m_dimensions; Dimensions m_dimensions;
TensorEvaluator<ArgType, Device> m_impl; TensorEvaluator<ArgType, Device> m_impl;
// Initialize the size of the trace dimension
Index m_traceDim;
const Device& m_device; const Device& m_device;
array<bool, NumInputDims> m_reduced; array<bool, NumInputDims> m_reduced;
array<Index, NumReducedDims> m_reducedDims; array<Index, NumReducedDims> m_reducedDims;
// Initialize the size of the trace dimension
Index m_traceDim;
array<Index, NumOutputDims> m_outputStrides; array<Index, NumOutputDims> m_outputStrides;
array<Index, NumReducedDims> m_reducedStrides; array<Index, NumReducedDims> m_reducedStrides;
array<Index, NumOutputDims> m_preservedStrides; array<Index, NumOutputDims> m_preservedStrides;

View File

@ -59,6 +59,7 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
template <typename T> struct MakePointer { template <typename T> struct MakePointer {
typedef T* Type; typedef T* Type;
typedef T& RefType; typedef T& RefType;
typedef T ScalarType;
}; };
typedef typename MakePointer<Scalar>::Type PointerType; typedef typename MakePointer<Scalar>::Type PointerType;
@ -80,6 +81,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
template <typename T> struct MakePointer { template <typename T> struct MakePointer {
typedef T* Type; typedef T* Type;
typedef T& RefType; typedef T& RefType;
typedef T ScalarType;
}; };
typedef typename MakePointer<Scalar>::Type PointerType; typedef typename MakePointer<Scalar>::Type PointerType;
@ -105,6 +107,8 @@ struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
typedef MakePointer_<T> MakePointerT; typedef MakePointer_<T> MakePointerT;
typedef typename MakePointerT::Type Type; typedef typename MakePointerT::Type Type;
typedef typename MakePointerT::RefType RefType; typedef typename MakePointerT::RefType RefType;
typedef typename MakePointerT::ScalarType ScalarType;
}; };
typedef typename MakePointer<Scalar>::Type PointerType; typedef typename MakePointer<Scalar>::Type PointerType;

View File

@ -684,10 +684,15 @@ template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> >
} }
namespace std { namespace std {
template <typename T> template <typename T>
class numeric_limits<Eigen::AutoDiffScalar<T> > class numeric_limits<Eigen::AutoDiffScalar<T> >
: public numeric_limits<typename T::Scalar> {}; : public numeric_limits<typename T::Scalar> {};
template <typename T>
class numeric_limits<Eigen::AutoDiffScalar<T&> >
: public numeric_limits<typename T::Scalar> {};
} // namespace std } // namespace std
#endif // EIGEN_AUTODIFF_SCALAR_H #endif // EIGEN_AUTODIFF_SCALAR_H

View File

@ -193,6 +193,8 @@ struct lgamma_impl<float> {
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) #if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy; int dummy;
return ::lgammaf_r(x, &dummy); return ::lgammaf_r(x, &dummy);
#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::lgamma(x);
#else #else
return ::lgammaf(x); return ::lgammaf(x);
#endif #endif
@ -206,6 +208,8 @@ struct lgamma_impl<double> {
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) #if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy; int dummy;
return ::lgamma_r(x, &dummy); return ::lgamma_r(x, &dummy);
#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::lgamma(x);
#else #else
return ::lgamma(x); return ::lgamma(x);
#endif #endif
@ -423,13 +427,25 @@ struct erf_retval {
template <> template <>
struct erf_impl<float> { struct erf_impl<float> {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); } static EIGEN_STRONG_INLINE float run(float x) {
#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::erf(x);
#else
return ::erff(x);
#endif
}
}; };
template <> template <>
struct erf_impl<double> { struct erf_impl<double> {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); } static EIGEN_STRONG_INLINE double run(double x) {
#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::erf(x);
#else
return ::erf(x);
#endif
}
}; };
#endif // EIGEN_HAS_C99_MATH #endif // EIGEN_HAS_C99_MATH
@ -456,13 +472,25 @@ struct erfc_retval {
template <> template <>
struct erfc_impl<float> { struct erfc_impl<float> {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } static EIGEN_STRONG_INLINE float run(const float x) {
#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::erfc(x);
#else
return ::erfcf(x);
#endif
}
}; };
template <> template <>
struct erfc_impl<double> { struct erfc_impl<double> {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } static EIGEN_STRONG_INLINE double run(const double x) {
#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
return cl::sycl::erfc(x);
#else
return ::erfc(x);
#endif
}
}; };
#endif // EIGEN_HAS_C99_MATH #endif // EIGEN_HAS_C99_MATH

View File

@ -50,7 +50,13 @@ static void test_static_dimension_failure()
.reshape(Tensor<int, 3>::Dimensions(2, 3, 1)) .reshape(Tensor<int, 3>::Dimensions(2, 3, 1))
.concatenate(right, 0); .concatenate(right, 0);
Tensor<int, 2, DataLayout> alternative = left Tensor<int, 2, DataLayout> alternative = left
.concatenate(right.reshape(Tensor<int, 2>::Dimensions{{{2, 3}}}), 0); // Clang compiler break with {{{}}} with an ambigous error on copy constructor
// the variadic DSize constructor added for #ifndef EIGEN_EMULATE_CXX11_META_H.
// Solution:
// either the code should change to
// Tensor<int, 2>::Dimensions{{2, 3}}
// or Tensor<int, 2>::Dimensions{Tensor<int, 2>::Dimensions{{2, 3}}}
.concatenate(right.reshape(Tensor<int, 2>::Dimensions{{2, 3}}), 0);
} }
template<int DataLayout> template<int DataLayout>

View File

@ -16,6 +16,25 @@
using Eigen::Tensor; using Eigen::Tensor;
class TestAllocator : public Allocator {
public:
~TestAllocator() override {}
EIGEN_DEVICE_FUNC void* allocate(size_t num_bytes) const override {
const_cast<TestAllocator*>(this)->alloc_count_++;
return internal::aligned_malloc(num_bytes);
}
EIGEN_DEVICE_FUNC void deallocate(void* buffer) const override {
const_cast<TestAllocator*>(this)->dealloc_count_++;
internal::aligned_free(buffer);
}
int alloc_count() const { return alloc_count_; }
int dealloc_count() const { return dealloc_count_; }
private:
int alloc_count_ = 0;
int dealloc_count_ = 0;
};
void test_multithread_elementwise() void test_multithread_elementwise()
{ {
@ -374,14 +393,14 @@ void test_multithread_random()
} }
template<int DataLayout> template<int DataLayout>
void test_multithread_shuffle() void test_multithread_shuffle(Allocator* allocator)
{ {
Tensor<float, 4, DataLayout> tensor(17,5,7,11); Tensor<float, 4, DataLayout> tensor(17,5,7,11);
tensor.setRandom(); tensor.setRandom();
const int num_threads = internal::random<int>(2, 11); const int num_threads = internal::random<int>(2, 11);
ThreadPool threads(num_threads); ThreadPool threads(num_threads);
Eigen::ThreadPoolDevice device(&threads, num_threads); Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
Tensor<float, 4, DataLayout> shuffle(7,5,11,17); Tensor<float, 4, DataLayout> shuffle(7,5,11,17);
array<ptrdiff_t, 4> shuffles = {{2,1,3,0}}; array<ptrdiff_t, 4> shuffles = {{2,1,3,0}};
@ -398,6 +417,21 @@ void test_multithread_shuffle()
} }
} }
void test_threadpool_allocate(TestAllocator* allocator)
{
const int num_threads = internal::random<int>(2, 11);
const int num_allocs = internal::random<int>(2, 11);
ThreadPool threads(num_threads);
Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
for (int a = 0; a < num_allocs; ++a) {
void* ptr = device.allocate(512);
device.deallocate(ptr);
}
VERIFY(allocator != nullptr);
VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs);
VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs);
}
EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool) EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool)
{ {
@ -424,6 +458,9 @@ EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool)
CALL_SUBTEST_6(test_memcpy()); CALL_SUBTEST_6(test_memcpy());
CALL_SUBTEST_6(test_multithread_random()); CALL_SUBTEST_6(test_multithread_random());
CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>());
CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>()); TestAllocator test_allocator;
CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>(nullptr));
CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>(&test_allocator));
CALL_SUBTEST_6(test_threadpool_allocate(&test_allocator));
} }

View File

@ -37,7 +37,7 @@ static void test_all_dimensions_trace() {
VERIFY_IS_EQUAL(result1(), sum); VERIFY_IS_EQUAL(result1(), sum);
Tensor<float, 5, DataLayout> tensor2(7, 7, 7, 7, 7); Tensor<float, 5, DataLayout> tensor2(7, 7, 7, 7, 7);
array<ptrdiff_t, 5> dims({{2, 1, 0, 3, 4}}); array<ptrdiff_t, 5> dims = { { 2, 1, 0, 3, 4 } };
Tensor<float, 0, DataLayout> result2 = tensor2.trace(dims); Tensor<float, 0, DataLayout> result2 = tensor2.trace(dims);
VERIFY_IS_EQUAL(result2.rank(), 0); VERIFY_IS_EQUAL(result2.rank(), 0);
sum = 0.0f; sum = 0.0f;
@ -52,7 +52,7 @@ template <int DataLayout>
static void test_simple_trace() { static void test_simple_trace() {
Tensor<float, 3, DataLayout> tensor1(3, 5, 3); Tensor<float, 3, DataLayout> tensor1(3, 5, 3);
tensor1.setRandom(); tensor1.setRandom();
array<ptrdiff_t, 2> dims1({{0, 2}}); array<ptrdiff_t, 2> dims1 = { { 0, 2 } };
Tensor<float, 1, DataLayout> result1 = tensor1.trace(dims1); Tensor<float, 1, DataLayout> result1 = tensor1.trace(dims1);
VERIFY_IS_EQUAL(result1.rank(), 1); VERIFY_IS_EQUAL(result1.rank(), 1);
VERIFY_IS_EQUAL(result1.dimension(0), 5); VERIFY_IS_EQUAL(result1.dimension(0), 5);
@ -67,7 +67,7 @@ static void test_simple_trace() {
Tensor<float, 4, DataLayout> tensor2(5, 5, 7, 7); Tensor<float, 4, DataLayout> tensor2(5, 5, 7, 7);
tensor2.setRandom(); tensor2.setRandom();
array<ptrdiff_t, 2> dims2({{2, 3}}); array<ptrdiff_t, 2> dims2 = { { 2, 3 } };
Tensor<float, 2, DataLayout> result2 = tensor2.trace(dims2); Tensor<float, 2, DataLayout> result2 = tensor2.trace(dims2);
VERIFY_IS_EQUAL(result2.rank(), 2); VERIFY_IS_EQUAL(result2.rank(), 2);
VERIFY_IS_EQUAL(result2.dimension(0), 5); VERIFY_IS_EQUAL(result2.dimension(0), 5);
@ -82,7 +82,7 @@ static void test_simple_trace() {
} }
} }
array<ptrdiff_t, 2> dims3({{1, 0}}); array<ptrdiff_t, 2> dims3 = { { 1, 0 } };
Tensor<float, 2, DataLayout> result3 = tensor2.trace(dims3); Tensor<float, 2, DataLayout> result3 = tensor2.trace(dims3);
VERIFY_IS_EQUAL(result3.rank(), 2); VERIFY_IS_EQUAL(result3.rank(), 2);
VERIFY_IS_EQUAL(result3.dimension(0), 7); VERIFY_IS_EQUAL(result3.dimension(0), 7);
@ -99,7 +99,7 @@ static void test_simple_trace() {
Tensor<float, 5, DataLayout> tensor3(3, 7, 3, 7, 3); Tensor<float, 5, DataLayout> tensor3(3, 7, 3, 7, 3);
tensor3.setRandom(); tensor3.setRandom();
array<ptrdiff_t, 3> dims4({{0, 2, 4}}); array<ptrdiff_t, 3> dims4 = { { 0, 2, 4 } };
Tensor<float, 2, DataLayout> result4 = tensor3.trace(dims4); Tensor<float, 2, DataLayout> result4 = tensor3.trace(dims4);
VERIFY_IS_EQUAL(result4.rank(), 2); VERIFY_IS_EQUAL(result4.rank(), 2);
VERIFY_IS_EQUAL(result4.dimension(0), 7); VERIFY_IS_EQUAL(result4.dimension(0), 7);
@ -116,7 +116,7 @@ static void test_simple_trace() {
Tensor<float, 5, DataLayout> tensor4(3, 7, 4, 7, 5); Tensor<float, 5, DataLayout> tensor4(3, 7, 4, 7, 5);
tensor4.setRandom(); tensor4.setRandom();
array<ptrdiff_t, 2> dims5({{1, 3}}); array<ptrdiff_t, 2> dims5 = { { 1, 3 } };
Tensor<float, 3, DataLayout> result5 = tensor4.trace(dims5); Tensor<float, 3, DataLayout> result5 = tensor4.trace(dims5);
VERIFY_IS_EQUAL(result5.rank(), 3); VERIFY_IS_EQUAL(result5.rank(), 3);
VERIFY_IS_EQUAL(result5.dimension(0), 3); VERIFY_IS_EQUAL(result5.dimension(0), 3);
@ -140,7 +140,7 @@ template<int DataLayout>
static void test_trace_in_expr() { static void test_trace_in_expr() {
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 3); Tensor<float, 4, DataLayout> tensor(2, 3, 5, 3);
tensor.setRandom(); tensor.setRandom();
array<ptrdiff_t, 2> dims({{1, 3}}); array<ptrdiff_t, 2> dims = { { 1, 3 } };
Tensor<float, 2, DataLayout> result(2, 5); Tensor<float, 2, DataLayout> result(2, 5);
result = result.constant(1.0f) - tensor.trace(dims); result = result.constant(1.0f) - tensor.trace(dims);
VERIFY_IS_EQUAL(result.rank(), 2); VERIFY_IS_EQUAL(result.rank(), 2);