mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Misc improvements and cleanups
This commit is contained in:
parent
4c70b0a762
commit
99d75235a9
@ -359,7 +359,7 @@ pmadd(const Packet& a,
|
||||
/** \internal \returns a packet version of \a *from.
|
||||
* If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
|
||||
template<typename Packet, int LoadMode>
|
||||
inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
|
||||
{
|
||||
if(LoadMode == Aligned)
|
||||
return pload<Packet>(from);
|
||||
@ -370,7 +370,7 @@ inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
|
||||
/** \internal copy the packet \a from to \a *to.
|
||||
* If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
|
||||
template<typename Scalar, typename Packet, int LoadMode>
|
||||
inline void pstoret(Scalar* to, const Packet& from)
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
|
||||
{
|
||||
if(LoadMode == Aligned)
|
||||
pstore(to, from);
|
||||
@ -378,6 +378,17 @@ inline void pstoret(Scalar* to, const Packet& from)
|
||||
pstoreu(to, from);
|
||||
}
|
||||
|
||||
/** \internal \returns a packet version of \a *from.
|
||||
* Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
|
||||
* hardware if available to speedup the loading of data that won't be modified
|
||||
* by the current computation.
|
||||
*/
|
||||
template<typename Packet, int LoadMode>
|
||||
inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
|
||||
{
|
||||
return ploadt<Packet, LoadMode>(from);
|
||||
}
|
||||
|
||||
/** \internal default implementation of palign() allowing partial specialization */
|
||||
template<int Offset,typename PacketType>
|
||||
struct palign_impl
|
||||
|
@ -30,6 +30,10 @@
|
||||
#include <cstring>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
#include <future>
|
||||
#endif
|
||||
|
||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||
#include <curand_kernel.h>
|
||||
#endif
|
||||
|
@ -66,6 +66,11 @@ template<std::size_t I, class T, std::size_t N> constexpr inline T& array_
|
||||
template<std::size_t I, class T, std::size_t N> constexpr inline T&& array_get(std::array<T,N>&& a) { return (T&&) STD_GET_ARR_HACK; }
|
||||
template<std::size_t I, class T, std::size_t N> constexpr inline T const& array_get(std::array<T,N> const& a) { return (T const&) STD_GET_ARR_HACK; }
|
||||
|
||||
template<std::size_t I, class T> constexpr inline T& array_get(std::vector<T>& a) { return a[I]; }
|
||||
template<std::size_t I, class T> constexpr inline T&& array_get(std::vector<T>&& a) { return a[I]; }
|
||||
template<std::size_t I, class T> constexpr inline T const& array_get(std::vector<T> const& a) { return a[I]; }
|
||||
|
||||
|
||||
#undef STD_GET_ARR_HACK
|
||||
|
||||
template <typename T> struct array_size;
|
||||
|
@ -48,7 +48,8 @@ template <typename T, size_t n> class array {
|
||||
values[2] = v3;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4) {
|
||||
EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3,
|
||||
const T& v4) {
|
||||
EIGEN_STATIC_ASSERT(n==4, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
values[0] = v1;
|
||||
values[1] = v2;
|
||||
@ -56,7 +57,8 @@ template <typename T, size_t n> class array {
|
||||
values[3] = v4;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4, const T& v5) {
|
||||
EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4,
|
||||
const T& v5) {
|
||||
EIGEN_STATIC_ASSERT(n==5, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
values[0] = v1;
|
||||
values[1] = v2;
|
||||
@ -64,6 +66,43 @@ template <typename T, size_t n> class array {
|
||||
values[3] = v4;
|
||||
values[4] = v5;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4,
|
||||
const T& v5, const T& v6) {
|
||||
EIGEN_STATIC_ASSERT(n==6, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
values[0] = v1;
|
||||
values[1] = v2;
|
||||
values[2] = v3;
|
||||
values[3] = v4;
|
||||
values[4] = v5;
|
||||
values[5] = v6;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE array(const T& v1, const T& v2, const T& v3, const T& v4,
|
||||
const T& v5, const T& v6, const T& v7) {
|
||||
EIGEN_STATIC_ASSERT(n==7, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
values[0] = v1;
|
||||
values[1] = v2;
|
||||
values[2] = v3;
|
||||
values[3] = v4;
|
||||
values[4] = v5;
|
||||
values[5] = v6;
|
||||
values[6] = v7;
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE array(
|
||||
const T& v1, const T& v2, const T& v3, const T& v4,
|
||||
const T& v5, const T& v6, const T& v7, const T& v8) {
|
||||
EIGEN_STATIC_ASSERT(n==8, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
values[0] = v1;
|
||||
values[1] = v2;
|
||||
values[2] = v3;
|
||||
values[3] = v4;
|
||||
values[4] = v5;
|
||||
values[5] = v6;
|
||||
values[6] = v7;
|
||||
values[7] = v8;
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
array(std::initializer_list<T> l) {
|
||||
@ -93,9 +132,11 @@ template<typename T, typename Tail=empty_list> struct type_list {
|
||||
|
||||
struct null_type { };
|
||||
|
||||
template<typename T1 = null_type, typename T2 = null_type, typename T3 = null_type, typename T4 = null_type, typename T5 = null_type>
|
||||
template<typename T1 = null_type, typename T2 = null_type, typename T3 = null_type,
|
||||
typename T4 = null_type, typename T5 = null_type, typename T6 = null_type,
|
||||
typename T7 = null_type, typename T8 = null_type>
|
||||
struct make_type_list {
|
||||
typedef typename make_type_list<T2, T3, T4, T5>::type tailresult;
|
||||
typedef typename make_type_list<T2, T3, T4, T5, T6, T7, T8>::type tailresult;
|
||||
|
||||
typedef type_list<T1, tailresult> type;
|
||||
};
|
||||
@ -150,6 +191,23 @@ template<typename T, T V> struct gen_numeric_list_repeated<T, 5, V> {
|
||||
typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
|
||||
};
|
||||
|
||||
template<typename T, T V> struct gen_numeric_list_repeated<T, 6, V> {
|
||||
typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>,
|
||||
type2val<T, V>, type2val<T, V>, type2val<T, V> >::type type;
|
||||
};
|
||||
|
||||
template<typename T, T V> struct gen_numeric_list_repeated<T, 7, V> {
|
||||
typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>,
|
||||
type2val<T, V>, type2val<T, V>, type2val<T, V>,
|
||||
type2val<T, V> >::type type;
|
||||
};
|
||||
|
||||
template<typename T, T V> struct gen_numeric_list_repeated<T, 8, V> {
|
||||
typedef typename make_type_list<type2val<T, V>, type2val<T, V>, type2val<T, V>,
|
||||
type2val<T, V>, type2val<T, V>, type2val<T, V>,
|
||||
type2val<T, V>, type2val<T, V> >::type type;
|
||||
};
|
||||
|
||||
|
||||
template <std::size_t index, class NList> struct get;
|
||||
|
||||
@ -174,6 +232,7 @@ template <> struct arg_prod<empty_list> {
|
||||
static const int value = 1;
|
||||
};
|
||||
|
||||
|
||||
template<int n, typename t>
|
||||
array<t, n> repeat(t v) {
|
||||
array<t, n> array;
|
||||
@ -190,6 +249,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Head::type array_get(const type_l
|
||||
return get<I, type_list<Head, Tail> >::value;
|
||||
}
|
||||
|
||||
template <class NList>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NList::HeadType::type array_prod(const NList& l) {
|
||||
return arg_prod<NList>::value;
|
||||
};
|
||||
|
||||
template<std::size_t n, typename t>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, n>& a) {
|
||||
t prod = 1;
|
||||
@ -201,6 +265,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const array<t, 0>& /*a*/) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename t>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE t array_prod(const std::vector<t>& a) {
|
||||
eigen_assert(a.size() > 0);
|
||||
t prod = 1;
|
||||
for (size_t i = 0; i < a.size(); ++i) { prod *= a[i]; }
|
||||
return prod;
|
||||
}
|
||||
|
||||
template<std::size_t I, class T, std::size_t N>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(array<T,N>& a) {
|
||||
return a[I];
|
||||
@ -210,12 +282,31 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array<T,N>& a) {
|
||||
return a[I];
|
||||
}
|
||||
|
||||
template<std::size_t I, class T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& array_get(std::vector<T>& a) {
|
||||
return a[I];
|
||||
}
|
||||
template<std::size_t I, class T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const std::vector<T>& a) {
|
||||
return a[I];
|
||||
}
|
||||
|
||||
template <typename T> struct array_size;
|
||||
template<class T, std::size_t N> struct array_size<array<T,N> > {
|
||||
static const size_t value = N;
|
||||
};
|
||||
template <typename T> struct array_size;
|
||||
template<class T, std::size_t N> struct array_size<array<T,N>& > {
|
||||
static const size_t value = N;
|
||||
};
|
||||
template <typename T> struct array_size;
|
||||
template<class T, std::size_t N> struct array_size<const array<T,N> > {
|
||||
static const size_t value = N;
|
||||
};
|
||||
|
||||
template <typename T> struct array_size;
|
||||
template<class T, std::size_t N> struct array_size<const array<T,N>& > {
|
||||
static const size_t value = N;
|
||||
};
|
||||
|
||||
struct sum_op {
|
||||
template<typename A, typename B> static inline bool run(A a, B b) { return a + b; }
|
||||
|
@ -131,8 +131,8 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
|
||||
m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
|
||||
static const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned;
|
||||
static const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned;
|
||||
const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned;
|
||||
const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned;
|
||||
m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
|
@ -30,6 +30,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
typedef Scalar CoeffReturnType;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketReturnType;
|
||||
|
||||
// Dimensions
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return derived().dimensions()[n]; }
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Index size() const { return internal::array_prod(derived().dimensions()); }
|
||||
|
||||
// Nullary operators
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived>
|
||||
@ -187,7 +193,7 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
}
|
||||
|
||||
// Contractions.
|
||||
typedef std::pair<Index, Index> DimensionPair;
|
||||
typedef Eigen::IndexPair<Index> DimensionPair;
|
||||
|
||||
template<typename OtherDerived, typename Dimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorContractionOp<const Dimensions, const Derived, const OtherDerived>
|
||||
|
@ -48,7 +48,7 @@ struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorB
|
||||
|
||||
|
||||
template<typename Broadcast, typename XprType>
|
||||
class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, WriteAccessors>
|
||||
class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, ReadOnlyAccessors>
|
||||
{
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Scalar Scalar;
|
||||
@ -91,7 +91,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
: m_impl(op.expression(), device)
|
||||
{
|
||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||
@ -141,7 +141,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const D
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
||||
|
||||
@ -161,7 +161,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const D
|
||||
if (innermostLoc + packetSize <= m_impl.dimensions()[0]) {
|
||||
return m_impl.template packet<Unaligned>(inputIndex);
|
||||
} else {
|
||||
EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
|
||||
EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||
values[0] = m_impl.coeff(inputIndex);
|
||||
for (int i = 1; i < packetSize; ++i) {
|
||||
values[i] = coeff(originalIndex+i);
|
||||
|
@ -872,11 +872,19 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
assert(m_buf);
|
||||
assert(index < m_dimensions.TotalSize());
|
||||
eigen_assert(m_buf);
|
||||
eigen_assert(index < m_dimensions.TotalSize());
|
||||
return m_buf[index];
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const
|
||||
{
|
||||
eigen_assert(m_buf);
|
||||
eigen_assert(index < m_dimensions.TotalSize());
|
||||
return internal::ploadt<PacketReturnType, LoadMode>(m_buf+index);
|
||||
}
|
||||
|
||||
private:
|
||||
// No assignment (copies are needed by the kernels)
|
||||
TensorEvaluator& operator = (const TensorEvaluator&);
|
||||
|
@ -38,6 +38,18 @@ template <typename ExpressionType, typename DeviceType> class TensorDevice {
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) {
|
||||
typedef typename OtherDerived::Scalar Scalar;
|
||||
typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum;
|
||||
Sum sum(m_expression, other);
|
||||
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
||||
Assign assign(m_expression, sum);
|
||||
static const bool Vectorize = TensorEvaluator<const Assign, DeviceType>::PacketAccess;
|
||||
internal::TensorExecutor<const Assign, DeviceType, Vectorize>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
const DeviceType& m_device;
|
||||
ExpressionType& m_expression;
|
||||
@ -58,6 +70,18 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPool
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) {
|
||||
typedef typename OtherDerived::Scalar Scalar;
|
||||
typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum;
|
||||
Sum sum(m_expression, other);
|
||||
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
||||
Assign assign(m_expression, sum);
|
||||
static const bool Vectorize = TensorEvaluator<const Assign, ThreadPoolDevice>::PacketAccess;
|
||||
internal::TensorExecutor<const Assign, ThreadPoolDevice, Vectorize>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
const ThreadPoolDevice& m_device;
|
||||
ExpressionType& m_expression;
|
||||
@ -79,6 +103,17 @@ template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) {
|
||||
typedef typename OtherDerived::Scalar Scalar;
|
||||
typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum;
|
||||
Sum sum(m_expression, other);
|
||||
typedef TensorAssignOp<ExpressionType, const Sum> Assign;
|
||||
Assign assign(m_expression, sum);
|
||||
internal::TensorExecutor<const Assign, GpuDevice, false>::run(assign, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
const GpuDevice& m_device;
|
||||
ExpressionType m_expression;
|
||||
|
@ -37,23 +37,41 @@ struct DefaultDevice {
|
||||
// Multiple cpu cores
|
||||
// We should really use a thread pool here but first we need to find a portable thread pool library.
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
|
||||
typedef std::future<void> Future;
|
||||
|
||||
struct ThreadPoolDevice {
|
||||
ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : /*pool_(pool), */num_threads_(num_cores) { }
|
||||
size_t numThreads() const { return num_threads_; }
|
||||
ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : num_threads_(num_cores) { }
|
||||
|
||||
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
||||
return internal::aligned_malloc(num_bytes);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
||||
internal::aligned_free(buffer);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||
::memcpy(dst, src, n);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||
::memset(buffer, c, n);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
|
||||
template <class Function, class... Args>
|
||||
EIGEN_STRONG_INLINE Future enqueue(Function&& f, Args&&... args) const {
|
||||
return std::async(std::launch::async, f, args...);
|
||||
}
|
||||
template <class Function, class... Args>
|
||||
EIGEN_STRONG_INLINE void enqueueNoFuture(Function&& f, Args&&... args) const {
|
||||
std::async(std::launch::async, f, args...);
|
||||
}
|
||||
|
||||
private:
|
||||
// todo: NUMA, ...
|
||||
size_t num_threads_;
|
||||
@ -63,40 +81,33 @@ struct ThreadPoolDevice {
|
||||
|
||||
// GPU offloading
|
||||
#ifdef EIGEN_USE_GPU
|
||||
static int m_numMultiProcessors = 0;
|
||||
static int m_maxThreadsPerBlock = 0;
|
||||
static int m_maxThreadsPerMultiProcessor = 0;
|
||||
static cudaDeviceProp m_deviceProperties;
|
||||
static bool m_devicePropInitialized = false;
|
||||
|
||||
static void initializeDeviceProp() {
|
||||
if (!m_devicePropInitialized) {
|
||||
assert(cudaGetDeviceProperties(&m_deviceProperties, 0) == cudaSuccess);
|
||||
m_devicePropInitialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int getNumCudaMultiProcessors() {
|
||||
if (m_numMultiProcessors == 0) {
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, 0);
|
||||
m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
|
||||
m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
|
||||
m_numMultiProcessors = deviceProp.multiProcessorCount;
|
||||
}
|
||||
return m_numMultiProcessors;
|
||||
initializeDeviceProp();
|
||||
return m_deviceProperties.multiProcessorCount;
|
||||
}
|
||||
static inline int maxCudaThreadsPerBlock() {
|
||||
if (m_maxThreadsPerBlock == 0) {
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, 0);
|
||||
m_numMultiProcessors = deviceProp.multiProcessorCount;
|
||||
m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
|
||||
m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
|
||||
}
|
||||
return m_maxThreadsPerBlock;
|
||||
initializeDeviceProp();
|
||||
return m_deviceProperties.maxThreadsPerBlock;
|
||||
}
|
||||
static inline int maxCudaThreadsPerMultiProcessor() {
|
||||
if (m_maxThreadsPerBlock == 0) {
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, 0);
|
||||
m_numMultiProcessors = deviceProp.multiProcessorCount;
|
||||
m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
|
||||
m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
|
||||
}
|
||||
return m_maxThreadsPerMultiProcessor;
|
||||
initializeDeviceProp();
|
||||
return m_deviceProperties.maxThreadsPerMultiProcessor;
|
||||
}
|
||||
static inline int sharedMemPerBlock() {
|
||||
initializeDeviceProp();
|
||||
return m_deviceProperties.sharedMemPerBlock;
|
||||
}
|
||||
|
||||
|
||||
struct GpuDevice {
|
||||
// The cudastream is not owned: the caller is responsible for its initialization and eventual destruction.
|
||||
@ -141,8 +152,8 @@ struct GpuDevice {
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||
// Fixme:
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||
// FIXME
|
||||
return 32;
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ namespace Eigen {
|
||||
* \sa Tensor
|
||||
*/
|
||||
|
||||
// Can't use std::pairs on cuda devices
|
||||
// Can't use std::pair on cuda devices
|
||||
template <typename Index> struct IndexPair {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) { }
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Index f, Index s) : first(f), second(s) { }
|
||||
|
@ -116,7 +116,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
|
||||
m_buffer[i] = m_impl.coeff(i);
|
||||
}
|
||||
EIGEN_STRONG_INLINE void evalPacket(Index i) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
|
||||
internal::pstoret<Scalar, Packet, Aligned>(m_buffer + i, m_impl.template packet<TensorEvaluator<ArgType, Device>::IsAligned ? Aligned : Unaligned>(i));
|
||||
}
|
||||
|
||||
|
@ -65,13 +65,13 @@ struct TensorEvaluator
|
||||
return m_data[index];
|
||||
}
|
||||
|
||||
template<int LoadMode> EIGEN_STRONG_INLINE
|
||||
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
PacketReturnType packet(Index index) const
|
||||
{
|
||||
return internal::ploadt<Packet, LoadMode>(m_data + index);
|
||||
}
|
||||
|
||||
template <int StoreMode> EIGEN_STRONG_INLINE
|
||||
template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
void writePacket(Index index, const Packet& x)
|
||||
{
|
||||
return internal::pstoret<Scalar, Packet, StoreMode>(m_data + index, x);
|
||||
@ -113,13 +113,17 @@ struct TensorEvaluator<const Derived, Device>
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||
eigen_assert(m_data);
|
||||
#ifdef __CUDA_ARCH__
|
||||
return __ldg(m_data+index);
|
||||
#else
|
||||
return m_data[index];
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int LoadMode> EIGEN_STRONG_INLINE
|
||||
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
PacketReturnType packet(Index index) const
|
||||
{
|
||||
return internal::ploadt<Packet, LoadMode>(m_data + index);
|
||||
return internal::ploadt_ro<Packet, LoadMode>(m_data + index);
|
||||
}
|
||||
|
||||
const Scalar* data() const { return m_data; }
|
||||
@ -166,7 +170,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(index);
|
||||
}
|
||||
@ -219,7 +223,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
|
||||
}
|
||||
@ -278,7 +282,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
|
||||
}
|
||||
template<int LoadMode>
|
||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
|
||||
}
|
||||
@ -340,7 +344,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
|
||||
return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
|
||||
}
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
|
||||
{
|
||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
internal::Selector<PacketSize> select;
|
||||
|
@ -10,10 +10,6 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H
|
||||
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
#include <future>
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorExecutor
|
||||
@ -62,7 +58,7 @@ class TensorExecutor<Expression, DefaultDevice, true>
|
||||
{
|
||||
const Index size = array_prod(evaluator.dimensions());
|
||||
static const int PacketSize = unpacket_traits<typename TensorEvaluator<Expression, DefaultDevice>::PacketReturnType>::size;
|
||||
const int VectorizedSize = (size / PacketSize) * PacketSize;
|
||||
const Index VectorizedSize = (size / PacketSize) * PacketSize;
|
||||
|
||||
for (Index i = 0; i < VectorizedSize; i += PacketSize) {
|
||||
evaluator.evalPacket(i);
|
||||
@ -131,10 +127,10 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
|
||||
const Index numblocks = size / blocksize;
|
||||
|
||||
Index i = 0;
|
||||
std::vector<std::future<void> > results;
|
||||
std::vector<Future> results;
|
||||
results.reserve(numblocks);
|
||||
for (int i = 0; i < numblocks; ++i) {
|
||||
results.push_back(std::async(std::launch::async, &EvalRange<Evaluator, Index>::run, &evaluator, i*blocksize, (i+1)*blocksize));
|
||||
results.push_back(device.enqueue(&EvalRange<Evaluator, Index>::run, &evaluator, i*blocksize, (i+1)*blocksize));
|
||||
}
|
||||
|
||||
for (int i = 0; i < numblocks; ++i) {
|
||||
@ -154,11 +150,31 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
|
||||
// GPU: the evaluation of the expression is offloaded to a GPU.
|
||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||
template <typename Evaluator>
|
||||
__global__ void EigenMetaKernel(Evaluator eval, unsigned int size) {
|
||||
__global__ void
|
||||
__launch_bounds__(1024)
|
||||
EigenMetaKernel(Evaluator eval, unsigned int size) {
|
||||
|
||||
const int first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int step_size = blockDim.x * gridDim.x;
|
||||
for (int i = first_index; i < size; i += step_size) {
|
||||
eval.evalScalar(i);
|
||||
|
||||
if (!Evaluator::PacketAccess || !Evaluator::IsAligned) {
|
||||
// Use the scalar path
|
||||
for (int i = first_index; i < size; i += step_size) {
|
||||
eval.evalScalar(i);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Use the vector path
|
||||
const int PacketSize = unpacket_traits<typename Evaluator::PacketReturnType>::size;
|
||||
const int vectorized_step_size = step_size * PacketSize;
|
||||
const int vectorized_size = (size / PacketSize) * PacketSize;
|
||||
int i = first_index * PacketSize;
|
||||
for ( ; i < vectorized_size; i += vectorized_step_size) {
|
||||
eval.evalPacket(i);
|
||||
}
|
||||
for ( ; i < size; i += step_size) {
|
||||
eval.evalScalar(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@ namespace Eigen {
|
||||
*
|
||||
* \brief The fixed sized version of the tensor class.
|
||||
*
|
||||
* The fixes sized equivalent of
|
||||
* The fixed sized equivalent of
|
||||
* Eigen::Tensor<float, 3> t(3, 5, 7);
|
||||
* is
|
||||
* Eigen::TensorFixedSize<float, Size<3,5,7>> t;
|
||||
@ -41,7 +41,7 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
|
||||
enum {
|
||||
IsAligned = bool(EIGEN_ALIGN),
|
||||
PacketAccess = true,
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
};
|
||||
|
||||
typedef Dimensions_ Dimensions;
|
||||
|
@ -31,30 +31,34 @@ namespace internal {
|
||||
template <typename T>
|
||||
struct TensorIntDivisor {
|
||||
public:
|
||||
TensorIntDivisor() {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
|
||||
multiplier = 0;
|
||||
shift1 = 0;
|
||||
shift2 = 0;
|
||||
}
|
||||
|
||||
// Must have 1 <= divider <= 2^31-1
|
||||
TensorIntDivisor(const T divider) {
|
||||
static const int N = 32;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) {
|
||||
const int N = 32;
|
||||
eigen_assert(divider > 0);
|
||||
eigen_assert(divider <= (1<<(N-1)) - 1);
|
||||
|
||||
// fast ln2
|
||||
#ifndef __CUDA_ARCH__
|
||||
const int leading_zeros = __builtin_clz(divider);
|
||||
const int l = N - (leading_zeros+1);
|
||||
#else
|
||||
const int leading_zeros = __clz(divider);
|
||||
#endif
|
||||
const int log_div = N - (leading_zeros+1);
|
||||
|
||||
multiplier = (static_cast<uint64_t>(1) << (N+l)) / divider - (static_cast<uint64_t>(1) << N) + 1;
|
||||
shift1 = (std::min)(1, l);
|
||||
shift2 = (std::max)(0, l-1);
|
||||
multiplier = (static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1;
|
||||
shift1 = log_div > 1 ? 1 : log_div;
|
||||
shift2 = log_div > 1 ? log_div-1 : 0;
|
||||
}
|
||||
|
||||
// Must have 0 <= numerator <= 2^32-1
|
||||
T divide(const T numerator) const {
|
||||
static const int N = 32;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const {
|
||||
const int N = 32;
|
||||
eigen_assert(numerator >= 0);
|
||||
eigen_assert(numerator <= (1ull<<N) - 1);
|
||||
|
||||
@ -71,7 +75,7 @@ struct TensorIntDivisor {
|
||||
|
||||
|
||||
template <typename T>
|
||||
static T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) {
|
||||
return divisor.divide(numerator);
|
||||
}
|
||||
|
||||
|
@ -42,26 +42,25 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
|
||||
|
||||
static const int Options = Options_;
|
||||
|
||||
static const std::size_t NumIndices = PlainObjectType::NumIndices;
|
||||
static const Index NumIndices = PlainObjectType::NumIndices;
|
||||
typedef typename PlainObjectType::Dimensions Dimensions;
|
||||
|
||||
|
||||
enum {
|
||||
IsAligned = bool(EIGEN_ALIGN) && ((int(Options_)&Aligned)==Aligned),
|
||||
PacketAccess = true,
|
||||
IsAligned = ((int(Options_)&Aligned)==Aligned),
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
};
|
||||
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(array<DenseIndex, NumIndices>({{firstDimension, otherDimensions...}})) {
|
||||
EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) {
|
||||
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(array<DenseIndex, NumIndices>(firstDimension)) {
|
||||
EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) {
|
||||
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
|
||||
EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -176,12 +175,13 @@ template<typename PlainObjectType, int Options_> class TensorMap : public Tensor
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices)
|
||||
{
|
||||
static_assert(sizeof...(otherIndices) + 1 == NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
|
||||
static_assert(sizeof...(otherIndices) + 1 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
|
||||
const std::size_t NumDims = sizeof...(otherIndices) + 1;
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
const Index index = m_dimensions.IndexOfRowMajor(array<Index, NumDims>{{firstIndex, otherIndices...}});
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = m_dimensions.IndexOfColMajor(array<Index, NumIndices>{{firstIndex, otherIndices...}});
|
||||
const Index index = m_dimensions.IndexOfColMajor(array<Index, NumDims>{{firstIndex, otherIndices...}});
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
||||
|
||||
@ -206,7 +206,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
|
||||
{
|
||||
static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||
for (int i = 0; i < packetSize; ++i) {
|
||||
values[i] = coeff(index+i);
|
||||
|
@ -97,7 +97,7 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
|
||||
enum {
|
||||
IsAligned = true,
|
||||
IsAligned = false,
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
};
|
||||
|
||||
@ -194,7 +194,7 @@ struct TensorEvaluator<TensorShufflingOp<Shuffle, ArgType>, Device>
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
|
||||
enum {
|
||||
IsAligned = true,
|
||||
IsAligned = false,
|
||||
PacketAccess = (internal::packet_traits<Scalar>::size > 1),
|
||||
};
|
||||
|
||||
|
@ -30,11 +30,11 @@ namespace Eigen {
|
||||
*
|
||||
* \sa Tensor
|
||||
*/
|
||||
template<typename T, std::size_t NumIndices_, DenseIndex Size, int Options_, typename Dimensions = void> class TensorStorage;
|
||||
template<typename T, DenseIndex NumIndices_, DenseIndex Size, int Options_, typename Dimensions = void> class TensorStorage;
|
||||
|
||||
|
||||
// Pure fixed-size storage
|
||||
template<typename T, std::size_t NumIndices_, DenseIndex Size, int Options_, typename FixedDimensions>
|
||||
template<typename T, DenseIndex NumIndices_, DenseIndex Size, int Options_, typename FixedDimensions>
|
||||
class TensorStorage
|
||||
{
|
||||
private:
|
||||
@ -62,7 +62,7 @@ class TensorStorage
|
||||
|
||||
|
||||
// pure-dynamic, but without specification of all dimensions explicitly
|
||||
template<typename T, std::size_t NumIndices_, int Options_>
|
||||
template<typename T, DenseIndex NumIndices_, int Options_>
|
||||
class TensorStorage<T, NumIndices_, Dynamic, Options_, void>
|
||||
: public TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type>
|
||||
{
|
||||
@ -79,7 +79,7 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, void>
|
||||
};
|
||||
|
||||
// pure dynamic
|
||||
template<typename T, std::size_t NumIndices_, int Options_>
|
||||
template<typename T, DenseIndex NumIndices_, int Options_>
|
||||
class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_numeric_list_repeated<DenseIndex, NumIndices_, Dynamic>::type>
|
||||
{
|
||||
T *m_data;
|
||||
@ -140,6 +140,7 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_nu
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSORSTORAGE_H
|
||||
|
@ -48,7 +48,7 @@ struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridin
|
||||
|
||||
|
||||
template<typename Strides, typename XprType>
|
||||
class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType>, WriteAccessors>
|
||||
class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
|
||||
{
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
|
||||
@ -97,7 +97,7 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
||||
|
||||
enum {
|
||||
IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
|
||||
PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/false,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
@ -109,28 +109,23 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
||||
}
|
||||
|
||||
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
if (i > 0) {
|
||||
m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
|
||||
m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
|
||||
} else {
|
||||
m_inputStrides[0] = 1;
|
||||
m_outputStrides[0] = 1;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < NumDims; ++i) {
|
||||
m_inputStrides[i] *= op.strides()[i];
|
||||
m_outputStrides[0] = 1;
|
||||
m_inputStrides[0] = 1;
|
||||
for (int i = 1; i < NumDims; ++i) {
|
||||
m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
|
||||
m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
|
||||
m_inputStrides[i-1] *= op.strides()[i-1];
|
||||
}
|
||||
m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
|
||||
}
|
||||
|
||||
// typedef typename XprType::Index Index;
|
||||
typedef typename XprType::Scalar Scalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) {
|
||||
m_impl.evalSubExprsIfNeeded(NULL);
|
||||
return true;
|
||||
}
|
||||
@ -150,16 +145,44 @@ struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
|
||||
return m_impl.coeff(inputIndex);
|
||||
}
|
||||
|
||||
/* template<int LoadMode>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
return m_impl.template packet<LoadMode>(index);
|
||||
}*/
|
||||
const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index+packetSize-1 < dimensions().TotalSize());
|
||||
|
||||
Index inputIndices[] = {0, 0};
|
||||
Index indices[] = {index, index + packetSize - 1};
|
||||
for (int i = NumDims - 1; i > 0; --i) {
|
||||
const Index idx0 = indices[0] / m_outputStrides[i];
|
||||
const Index idx1 = indices[1] / m_outputStrides[i];
|
||||
inputIndices[0] += idx0 * m_inputStrides[i];
|
||||
inputIndices[1] += idx1 * m_inputStrides[i];
|
||||
indices[0] -= idx0 * m_outputStrides[i];
|
||||
indices[1] -= idx1 * m_outputStrides[i];
|
||||
}
|
||||
inputIndices[0] += indices[0] * m_inputStrides[0];
|
||||
inputIndices[1] += indices[1] * m_inputStrides[0];
|
||||
if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
|
||||
PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
|
||||
return rslt;
|
||||
}
|
||||
else {
|
||||
EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
|
||||
values[0] = m_impl.coeff(inputIndices[0]);
|
||||
values[packetSize-1] = m_impl.coeff(inputIndices[1]);
|
||||
for (int i = 1; i < packetSize-1; ++i) {
|
||||
values[i] = coeff(index+i);
|
||||
}
|
||||
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
|
||||
return rslt;
|
||||
}
|
||||
}
|
||||
|
||||
Scalar* data() const { return NULL; }
|
||||
|
||||
protected:
|
||||
// Strides m_strides;
|
||||
Dimensions m_dimensions;
|
||||
array<Index, NumDims> m_outputStrides;
|
||||
array<Index, NumDims> m_inputStrides;
|
||||
|
@ -70,14 +70,18 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_> >
|
||||
};
|
||||
|
||||
|
||||
template<typename PlainObjectType>
|
||||
struct traits<TensorMap<PlainObjectType> >
|
||||
template<typename PlainObjectType, int Options_>
|
||||
struct traits<TensorMap<PlainObjectType, Options_> >
|
||||
: public traits<PlainObjectType>
|
||||
{
|
||||
typedef traits<PlainObjectType> BaseTraits;
|
||||
typedef typename BaseTraits::Scalar Scalar;
|
||||
typedef typename BaseTraits::StorageKind StorageKind;
|
||||
typedef typename BaseTraits::Index Index;
|
||||
enum {
|
||||
Options = Options_,
|
||||
Flags = ((BaseTraits::Flags | LvalueBit) & ~AlignedBit) | (Options&Aligned ? AlignedBit : 0),
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@ -105,16 +109,16 @@ struct eval<const TensorFixedSize<Scalar_, Dimensions, Options>, Eigen::Dense>
|
||||
typedef const TensorFixedSize<Scalar_, Dimensions, Options>& type;
|
||||
};
|
||||
|
||||
template<typename PlainObjectType>
|
||||
struct eval<TensorMap<PlainObjectType>, Eigen::Dense>
|
||||
template<typename PlainObjectType, int Options>
|
||||
struct eval<TensorMap<PlainObjectType, Options>, Eigen::Dense>
|
||||
{
|
||||
typedef const TensorMap<PlainObjectType>& type;
|
||||
typedef const TensorMap<PlainObjectType, Options>& type;
|
||||
};
|
||||
|
||||
template<typename PlainObjectType>
|
||||
struct eval<const TensorMap<PlainObjectType>, Eigen::Dense>
|
||||
template<typename PlainObjectType, int Options>
|
||||
struct eval<const TensorMap<PlainObjectType, Options>, Eigen::Dense>
|
||||
{
|
||||
typedef const TensorMap<PlainObjectType>& type;
|
||||
typedef const TensorMap<PlainObjectType, Options>& type;
|
||||
};
|
||||
|
||||
template <typename Scalar_, std::size_t NumIndices_, int Options_>
|
||||
@ -141,16 +145,16 @@ struct nested<const TensorFixedSize<Scalar_, Dimensions, Options>, 1, typename e
|
||||
typedef const TensorFixedSize<Scalar_, Dimensions, Options>& type;
|
||||
};
|
||||
|
||||
template <typename PlainObjectType>
|
||||
struct nested<TensorMap<PlainObjectType>, 1, typename eval<TensorMap<PlainObjectType> >::type>
|
||||
template <typename PlainObjectType, int Options>
|
||||
struct nested<TensorMap<PlainObjectType, Options>, 1, typename eval<TensorMap<PlainObjectType, Options> >::type>
|
||||
{
|
||||
typedef const TensorMap<PlainObjectType>& type;
|
||||
typedef const TensorMap<PlainObjectType, Options>& type;
|
||||
};
|
||||
|
||||
template <typename PlainObjectType>
|
||||
struct nested<const TensorMap<PlainObjectType>, 1, typename eval<TensorMap<PlainObjectType> >::type>
|
||||
template <typename PlainObjectType, int Options>
|
||||
struct nested<const TensorMap<PlainObjectType, Options>, 1, typename eval<TensorMap<PlainObjectType, Options> >::type>
|
||||
{
|
||||
typedef const TensorMap<PlainObjectType>& type;
|
||||
typedef const TensorMap<PlainObjectType, Options>& type;
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -110,6 +110,7 @@ if(EIGEN_TEST_CXX11)
|
||||
# ei_add_test(cxx11_tensor_fixed_size "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_const "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_of_const_values "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_of_complex "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_of_strings "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_intdiv "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_lvalue "-std=c++0x")
|
||||
|
@ -253,6 +253,39 @@ static void test_auto_resize()
|
||||
}
|
||||
|
||||
|
||||
static void test_compound_assign()
|
||||
{
|
||||
Tensor<int, 1> start_tensor(10);
|
||||
Tensor<int, 1> offset_tensor(10);
|
||||
start_tensor.setRandom();
|
||||
offset_tensor.setRandom();
|
||||
|
||||
Tensor<int, 1> tensor = start_tensor;
|
||||
tensor += offset_tensor;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
VERIFY_IS_EQUAL(tensor(i), start_tensor(i) + offset_tensor(i));
|
||||
}
|
||||
|
||||
tensor = start_tensor;
|
||||
tensor -= offset_tensor;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
VERIFY_IS_EQUAL(tensor(i), start_tensor(i) - offset_tensor(i));
|
||||
}
|
||||
|
||||
tensor = start_tensor;
|
||||
tensor *= offset_tensor;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
VERIFY_IS_EQUAL(tensor(i), start_tensor(i) * offset_tensor(i));
|
||||
}
|
||||
|
||||
tensor = start_tensor;
|
||||
tensor /= offset_tensor;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
VERIFY_IS_EQUAL(tensor(i), start_tensor(i) / offset_tensor(i));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cxx11_tensor_assign()
|
||||
{
|
||||
CALL_SUBTEST(test_1d());
|
||||
@ -260,5 +293,5 @@ void test_cxx11_tensor_assign()
|
||||
CALL_SUBTEST(test_3d());
|
||||
CALL_SUBTEST(test_same_type());
|
||||
CALL_SUBTEST(test_auto_resize());
|
||||
|
||||
CALL_SUBTEST(test_compound_assign());
|
||||
}
|
||||
|
@ -64,8 +64,78 @@ static void test_expr()
|
||||
}
|
||||
|
||||
|
||||
static void test_modes() {
|
||||
Tensor<float, 1> input(3);
|
||||
Tensor<float, 1> kernel(3);
|
||||
input(0) = 1.0f;
|
||||
input(1) = 2.0f;
|
||||
input(2) = 3.0f;
|
||||
kernel(0) = 0.5f;
|
||||
kernel(1) = 1.0f;
|
||||
kernel(2) = 0.0f;
|
||||
|
||||
const Eigen::array<ptrdiff_t, 1> dims{{0}};
|
||||
Eigen::array<std::pair<ptrdiff_t, ptrdiff_t>, 1> padding;
|
||||
|
||||
// Emulate VALID mode (as defined in
|
||||
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
|
||||
padding[0] = std::make_pair(0, 0);
|
||||
Tensor<float, 1> valid(1);
|
||||
valid = input.pad(padding).convolve(kernel, dims);
|
||||
VERIFY_IS_EQUAL(valid.dimension(0), 1);
|
||||
VERIFY_IS_APPROX(valid(0), 2.5f);
|
||||
|
||||
// Emulate SAME mode (as defined in
|
||||
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
|
||||
padding[0] = std::make_pair(1, 1);
|
||||
Tensor<float, 1> same(3);
|
||||
same = input.pad(padding).convolve(kernel, dims);
|
||||
VERIFY_IS_EQUAL(same.dimension(0), 3);
|
||||
VERIFY_IS_APPROX(same(0), 1.0f);
|
||||
VERIFY_IS_APPROX(same(1), 2.5f);
|
||||
VERIFY_IS_APPROX(same(2), 4.0f);
|
||||
|
||||
// Emulate FULL mode (as defined in
|
||||
// http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
|
||||
padding[0] = std::make_pair(2, 2);
|
||||
Tensor<float, 1> full(5);
|
||||
full = input.pad(padding).convolve(kernel, dims);
|
||||
VERIFY_IS_EQUAL(full.dimension(0), 5);
|
||||
VERIFY_IS_APPROX(full(0), 0.0f);
|
||||
VERIFY_IS_APPROX(full(1), 1.0f);
|
||||
VERIFY_IS_APPROX(full(2), 2.5f);
|
||||
VERIFY_IS_APPROX(full(3), 4.0f);
|
||||
VERIFY_IS_APPROX(full(4), 1.5f);
|
||||
}
|
||||
|
||||
|
||||
static void test_strides() {
|
||||
Tensor<float, 1> input(13);
|
||||
Tensor<float, 1> kernel(3);
|
||||
input.setRandom();
|
||||
kernel.setRandom();
|
||||
|
||||
const Eigen::array<ptrdiff_t, 1> dims{{0}};
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_3{{3}};
|
||||
const Eigen::array<ptrdiff_t, 1> stride_of_2{{2}};
|
||||
|
||||
Tensor<float, 1> result;
|
||||
result = input.stride(stride_of_3).convolve(kernel, dims).stride(stride_of_2);
|
||||
|
||||
VERIFY_IS_EQUAL(result.dimension(0), 2);
|
||||
VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) +
|
||||
input(6)*kernel(2)));
|
||||
VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) +
|
||||
input(12)*kernel(2)));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void test_cxx11_tensor_convolution()
|
||||
{
|
||||
CALL_SUBTEST(test_evals());
|
||||
CALL_SUBTEST(test_expr());
|
||||
CALL_SUBTEST(test_modes());
|
||||
CALL_SUBTEST(test_strides());
|
||||
}
|
||||
|
@ -123,6 +123,14 @@ static void test_forced_contextual_eval(Context* context)
|
||||
context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f);
|
||||
}
|
||||
|
||||
template <typename Context>
|
||||
static void test_compound_assignment(Context* context)
|
||||
{
|
||||
context->out().device(context->device()) = context->in1().constant(2.718f);
|
||||
context->out().device(context->device()) += context->in1() + context->in2() * 3.14f;
|
||||
}
|
||||
|
||||
|
||||
template <typename Context>
|
||||
static void test_contraction(Context* context)
|
||||
{
|
||||
@ -197,6 +205,15 @@ static void test_cpu() {
|
||||
}
|
||||
}
|
||||
|
||||
test_compound_assignment(&context);
|
||||
for (int i = 0; i < 40; ++i) {
|
||||
for (int j = 0; j < 50; ++j) {
|
||||
for (int k = 0; k < 70; ++k) {
|
||||
VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f + 2.718f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_contraction(&context);
|
||||
for (int i = 0; i < 40; ++i) {
|
||||
for (int j = 0; j < 40; ++j) {
|
||||
@ -299,6 +316,16 @@ static void test_gpu() {
|
||||
}
|
||||
}
|
||||
|
||||
test_compound_assignment(&context);
|
||||
assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess);
|
||||
for (int i = 0; i < 40; ++i) {
|
||||
for (int j = 0; j < 50; ++j) {
|
||||
for (int k = 0; k < 70; ++k) {
|
||||
VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f + 2.718f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_contraction(&context);
|
||||
assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess);
|
||||
for (int i = 0; i < 40; ++i) {
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
using Eigen::Tensor;
|
||||
using Eigen::IndexPair;
|
||||
|
||||
static void test_simple_reshape()
|
||||
{
|
||||
@ -52,7 +53,7 @@ static void test_reshape_in_expr() {
|
||||
TensorMap<Tensor<float, 5>> tensor2(m2.data(), 3,5,7,11,13);
|
||||
Tensor<float, 2>::Dimensions newDims1{{2,3*5*7*11}};
|
||||
Tensor<float, 2>::Dimensions newDims2{{3*5*7*11,13}};
|
||||
array<Tensor<float, 1>::DimensionPair, 1> contract_along{{std::make_pair(1, 0)}};
|
||||
Eigen::array<IndexPair<DenseIndex>, 1> contract_along{{IndexPair<DenseIndex>(1, 0)}};
|
||||
Tensor<float, 2> tensor3(2,13);
|
||||
tensor3 = tensor1.reshape(newDims1).contract(tensor2.reshape(newDims2), contract_along);
|
||||
|
||||
@ -125,7 +126,7 @@ static void test_slice_in_expr() {
|
||||
TensorMap<Tensor<float, 2>> tensor1(m1.data(), 7, 7);
|
||||
TensorMap<Tensor<float, 2>> tensor2(m2.data(), 3, 3);
|
||||
Tensor<float, 2> tensor3(3,1);
|
||||
array<Tensor<float, 1>::DimensionPair, 1> contract_along{{std::make_pair(1, 0)}};
|
||||
array<IndexPair<DenseIndex>, 1> contract_along{{IndexPair<DenseIndex>(1, 0)}};
|
||||
|
||||
Eigen::DSizes<ptrdiff_t, 2> indices1(1,2);
|
||||
Eigen::DSizes<ptrdiff_t, 2> sizes1(3,3);
|
||||
|
64
unsupported/test/cxx11_tensor_of_complex.cpp
Normal file
64
unsupported/test/cxx11_tensor_of_complex.cpp
Normal file
@ -0,0 +1,64 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
using Eigen::Tensor;
|
||||
using Eigen::TensorMap;
|
||||
|
||||
|
||||
|
||||
static void test_additions()
|
||||
{
|
||||
Tensor<std::complex<float>, 1> data1(3);
|
||||
Tensor<std::complex<float>, 1> data2(3);
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
data1(i) = std::complex<float>(i, -i);
|
||||
data2(i) = std::complex<float>(i, 7 * i);
|
||||
}
|
||||
|
||||
Tensor<std::complex<float>, 1> sum = data1 + data2;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
VERIFY_IS_EQUAL(sum(i), std::complex<float>(2*i, 6*i));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_contractions()
|
||||
{
|
||||
Tensor<std::complex<float>, 4> t_left(30, 50, 8, 31);
|
||||
Tensor<std::complex<float>, 5> t_right(8, 31, 7, 20, 10);
|
||||
Tensor<std::complex<float>, 5> t_result(30, 50, 7, 20, 10);
|
||||
|
||||
t_left.setRandom();
|
||||
t_right.setRandom();
|
||||
|
||||
typedef Map<Matrix<std::complex<float>, Dynamic, Dynamic>> MapXcf;
|
||||
MapXcf m_left(t_left.data(), 1500, 248);
|
||||
MapXcf m_right(t_right.data(), 248, 1400);
|
||||
Matrix<std::complex<float>, Dynamic, Dynamic> m_result(1500, 1400);
|
||||
|
||||
// This contraction should be equivalent to a regular matrix multiplication
|
||||
typedef Tensor<float, 1>::DimensionPair DimPair;
|
||||
Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
|
||||
t_result = t_left.contract(t_right, dims);
|
||||
m_result = m_left * m_right;
|
||||
for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
|
||||
VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cxx11_tensor_of_complex()
|
||||
{
|
||||
CALL_SUBTEST(test_additions());
|
||||
CALL_SUBTEST(test_contractions());
|
||||
}
|
@ -9,22 +9,23 @@
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
|
||||
#include <iostream>
|
||||
#include "main.h"
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
|
||||
using Eigen::Tensor;
|
||||
|
||||
void test_cxx11_tensor_thread_pool()
|
||||
static void test_multithread_elementwise()
|
||||
{
|
||||
Eigen::Tensor<float, 3> in1(2,3,7);
|
||||
Eigen::Tensor<float, 3> in2(2,3,7);
|
||||
Eigen::Tensor<float, 3> out(2,3,7);
|
||||
Tensor<float, 3> in1(2,3,7);
|
||||
Tensor<float, 3> in2(2,3,7);
|
||||
Tensor<float, 3> out(2,3,7);
|
||||
|
||||
in1.setRandom();
|
||||
in2.setRandom();
|
||||
|
||||
Eigen::ThreadPoolDevice thread_pool_device(3);
|
||||
Eigen::ThreadPoolDevice thread_pool_device(internal::random<int>(3, 11));
|
||||
out.device(thread_pool_device) = in1 + in2 * 3.14f;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
@ -35,3 +36,222 @@ void test_cxx11_tensor_thread_pool()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_multithread_compound_assignment()
|
||||
{
|
||||
Tensor<float, 3> in1(2,3,7);
|
||||
Tensor<float, 3> in2(2,3,7);
|
||||
Tensor<float, 3> out(2,3,7);
|
||||
|
||||
in1.setRandom();
|
||||
in2.setRandom();
|
||||
|
||||
Eigen::ThreadPoolDevice thread_pool_device(internal::random<int>(3, 11));
|
||||
out.device(thread_pool_device) = in1;
|
||||
out.device(thread_pool_device) += in2 * 3.14f;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int k = 0; k < 7; ++k) {
|
||||
VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_multithread_contraction()
|
||||
{
|
||||
Tensor<float, 4> t_left(30, 50, 37, 31);
|
||||
Tensor<float, 5> t_right(37, 31, 70, 2, 10);
|
||||
Tensor<float, 5> t_result(30, 50, 70, 2, 10);
|
||||
|
||||
t_left.setRandom();
|
||||
t_right.setRandom();
|
||||
|
||||
// this contraction should be equivalent to a single matrix multiplication
|
||||
typedef Tensor<float, 1>::DimensionPair DimPair;
|
||||
Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}});
|
||||
|
||||
|
||||
typedef Map<MatrixXf> MapXf;
|
||||
MapXf m_left(t_left.data(), 1500, 1147);
|
||||
MapXf m_right(t_right.data(), 1147, 1400);
|
||||
MatrixXf m_result(1500, 1400);
|
||||
|
||||
Eigen::ThreadPoolDevice thread_pool_device(4);
|
||||
|
||||
// compute results by separate methods
|
||||
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
|
||||
m_result = m_left * m_right;
|
||||
|
||||
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
|
||||
VERIFY(&t_result.data()[i] != &m_result.data()[i]);
|
||||
if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
|
||||
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_contraction_corner_cases()
|
||||
{
|
||||
Tensor<float, 2> t_left(32, 500);
|
||||
Tensor<float, 2> t_right(32, 28*28);
|
||||
Tensor<float, 2> t_result(500, 28*28);
|
||||
|
||||
t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
|
||||
t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
|
||||
t_result = t_result.constant(NAN);
|
||||
|
||||
// this contraction should be equivalent to a single matrix multiplication
|
||||
typedef Tensor<float, 1>::DimensionPair DimPair;
|
||||
Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}};
|
||||
|
||||
typedef Map<MatrixXf> MapXf;
|
||||
MapXf m_left(t_left.data(), 32, 500);
|
||||
MapXf m_right(t_right.data(), 32, 28*28);
|
||||
MatrixXf m_result(500, 28*28);
|
||||
|
||||
Eigen::ThreadPoolDevice thread_pool_device(12);
|
||||
|
||||
// compute results by separate methods
|
||||
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
|
||||
m_result = m_left.transpose() * m_right;
|
||||
|
||||
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
|
||||
assert(!isnan(t_result.data()[i]));
|
||||
if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
|
||||
std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
t_left.resize(32, 1);
|
||||
t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
|
||||
t_result.resize (1, 28*28);
|
||||
t_result = t_result.constant(NAN);
|
||||
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
|
||||
new(&m_left) MapXf(t_left.data(), 32, 1);
|
||||
m_result = m_left.transpose() * m_right;
|
||||
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
|
||||
assert(!isnan(t_result.data()[i]));
|
||||
if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
|
||||
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
t_left.resize(32, 500);
|
||||
t_right.resize(32, 4);
|
||||
t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
|
||||
t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
|
||||
t_result.resize (500, 4);
|
||||
t_result = t_result.constant(NAN);
|
||||
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
|
||||
new(&m_left) MapXf(t_left.data(), 32, 500);
|
||||
new(&m_right) MapXf(t_right.data(), 32, 4);
|
||||
m_result = m_left.transpose() * m_right;
|
||||
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
|
||||
assert(!isnan(t_result.data()[i]));
|
||||
if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
|
||||
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
t_left.resize(32, 1);
|
||||
t_right.resize(32, 4);
|
||||
t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f;
|
||||
t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f;
|
||||
t_result.resize (1, 4);
|
||||
t_result = t_result.constant(NAN);
|
||||
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
|
||||
new(&m_left) MapXf(t_left.data(), 32, 1);
|
||||
new(&m_right) MapXf(t_right.data(), 32, 4);
|
||||
m_result = m_left.transpose() * m_right;
|
||||
for (ptrdiff_t i = 0; i < t_result.size(); i++) {
|
||||
assert(!isnan(t_result.data()[i]));
|
||||
if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) {
|
||||
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_multithread_contraction_agrees_with_singlethread() {
|
||||
int contract_size = internal::random<int>(1, 5000);
|
||||
|
||||
Tensor<float, 3> left(internal::random<int>(1, 80),
|
||||
contract_size,
|
||||
internal::random<int>(1, 100));
|
||||
|
||||
Tensor<float, 4> right(internal::random<int>(1, 25),
|
||||
internal::random<int>(1, 37),
|
||||
contract_size,
|
||||
internal::random<int>(1, 51));
|
||||
|
||||
left.setRandom();
|
||||
right.setRandom();
|
||||
|
||||
// add constants to shift values away from 0 for more precision
|
||||
left += left.constant(1.5f);
|
||||
right += right.constant(1.5f);
|
||||
|
||||
typedef Tensor<float, 1>::DimensionPair DimPair;
|
||||
Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}});
|
||||
|
||||
Eigen::ThreadPoolDevice thread_pool_device(internal::random<int>(2, 11));
|
||||
|
||||
Tensor<float, 5> st_result;
|
||||
st_result = left.contract(right, dims);
|
||||
|
||||
Tensor<float, 5> tp_result(st_result.dimensions());
|
||||
tp_result.device(thread_pool_device) = left.contract(right, dims);
|
||||
|
||||
VERIFY(internal::dimensions_match(st_result.dimensions(), tp_result.dimensions()));
|
||||
for (ptrdiff_t i = 0; i < st_result.size(); i++) {
|
||||
// if both of the values are very small, then do nothing (because the test will fail
|
||||
// due to numerical precision issues when values are small)
|
||||
if (fabs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4) {
|
||||
VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void test_memcpy() {
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
const int num_threads = internal::random<int>(3, 11);
|
||||
Eigen::ThreadPoolDevice thread_pool_device(num_threads);
|
||||
|
||||
const int size = internal::random<int>(13, 7632);
|
||||
Tensor<float, 1> t1(size);
|
||||
t1.setRandom();
|
||||
std::vector<float> result(size);
|
||||
thread_pool_device.memcpy(&result[0], t1.data(), size*sizeof(float));
|
||||
for (int i = 0; i < size; i++) {
|
||||
VERIFY_IS_EQUAL(t1(i), result[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void test_cxx11_tensor_thread_pool()
|
||||
{
|
||||
CALL_SUBTEST(test_multithread_elementwise());
|
||||
CALL_SUBTEST(test_multithread_compound_assignment());
|
||||
|
||||
CALL_SUBTEST(test_multithread_contraction());
|
||||
|
||||
CALL_SUBTEST(test_multithread_contraction_agrees_with_singlethread());
|
||||
|
||||
// Exercise various cases that have been problematic in the past.
|
||||
CALL_SUBTEST(test_contraction_corner_cases());
|
||||
|
||||
CALL_SUBTEST(test_memcpy());
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user