mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Vectorized the evaluation of tensor expression (using SSE, AVX, NEON, ...)
Added the ability to parallelize the evaluation of a tensor expression over multiple cpu cores. Added the ability to offload the evaluation of a tensor expression to a GPU.
This commit is contained in:
parent
0320f7e3a7
commit
7402fea0a8
@ -31,6 +31,7 @@
|
||||
#include "Eigen/Core"
|
||||
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
|
||||
|
||||
@ -39,6 +40,7 @@
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
|
||||
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/Tensor.h"
|
||||
|
@ -75,9 +75,15 @@ class Tensor : public TensorBase<Tensor<Scalar_, NumIndices_, Options_> >
|
||||
typedef typename internal::traits<Self>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Self>::Index Index;
|
||||
typedef Scalar_ Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Base::PacketReturnType PacketReturnType;
|
||||
|
||||
enum {
|
||||
IsAligned = bool(EIGEN_ALIGN),
|
||||
PacketAccess = true,
|
||||
};
|
||||
|
||||
static const int Options = Options_;
|
||||
static const std::size_t NumIndices = NumIndices_;
|
||||
|
@ -10,6 +10,9 @@
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
|
||||
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
#include <future>
|
||||
#endif
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
@ -28,7 +31,8 @@ namespace Eigen {
|
||||
*/
|
||||
namespace internal {
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
// Default strategy: the expressions are evaluated with a single cpu thread.
|
||||
template<typename Derived1, typename Derived2, bool Vectorizable = TensorEvaluator<Derived1>::PacketAccess & TensorEvaluator<Derived2>::PacketAccess>
|
||||
struct TensorAssign
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
@ -38,13 +42,150 @@ struct TensorAssign
|
||||
TensorEvaluator<Derived1> evalDst(dst);
|
||||
TensorEvaluator<Derived2> evalSrc(src);
|
||||
const Index size = dst.size();
|
||||
for(Index i = 0; i < size; ++i) {
|
||||
for (Index i = 0; i < size; ++i) {
|
||||
evalDst.coeffRef(i) = evalSrc.coeff(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct TensorAssign<Derived1, Derived2, true>
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
EIGEN_DEVICE_FUNC
|
||||
static inline void run(Derived1& dst, const Derived2& src)
|
||||
{
|
||||
TensorEvaluator<Derived1> evalDst(dst);
|
||||
TensorEvaluator<Derived2> evalSrc(src);
|
||||
const Index size = dst.size();
|
||||
|
||||
static const int LhsStoreMode = TensorEvaluator<Derived1>::IsAligned ? Aligned : Unaligned;
|
||||
static const int RhsLoadMode = TensorEvaluator<Derived2>::IsAligned ? Aligned : Unaligned;
|
||||
static const int PacketSize = unpacket_traits<typename TensorEvaluator<Derived1>::PacketReturnType>::size;
|
||||
static const int VectorizedSize = (size / PacketSize) * PacketSize;
|
||||
|
||||
for (Index i = 0; i < VectorizedSize; i += PacketSize) {
|
||||
evalDst.template writePacket<LhsStoreMode>(i, evalSrc.template packet<RhsLoadMode>(i));
|
||||
}
|
||||
for (Index i = VectorizedSize; i < size; ++i) {
|
||||
evalDst.coeffRef(i) = evalSrc.coeff(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
// Multicore strategy: the index space is partitioned and each core is assigned to a partition
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
template <typename LhsEval, typename RhsEval, typename Index, bool Vectorizable = LhsEval::PacketAccess & RhsEval::PacketAccess>
|
||||
struct EvalRange {
|
||||
static void run(LhsEval& dst, const RhsEval& src, const Index first, const Index last) {
|
||||
eigen_assert(last > first);
|
||||
for (Index i = first; i < last; ++i) {
|
||||
dst.coeffRef(i) = src.coeff(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename LhsEval, typename RhsEval, typename Index>
|
||||
struct EvalRange<LhsEval, RhsEval, Index, true> {
|
||||
static void run(LhsEval& dst, const RhsEval& src, const Index first, const Index last) {
|
||||
eigen_assert(last > first);
|
||||
|
||||
Index i = first;
|
||||
static const int PacketSize = unpacket_traits<typename LhsEval::PacketReturnType>::size;
|
||||
if (last - first > PacketSize) {
|
||||
static const int LhsStoreMode = LhsEval::IsAligned ? Aligned : Unaligned;
|
||||
static const int RhsLoadMode = RhsEval::IsAligned ? Aligned : Unaligned;
|
||||
eigen_assert(first % PacketSize == 0);
|
||||
Index lastPacket = last - (last % PacketSize);
|
||||
for (; i < lastPacket; i += PacketSize) {
|
||||
dst.template writePacket<LhsStoreMode>(i, src.template packet<RhsLoadMode>(i));
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < last; ++i) {
|
||||
dst.coeffRef(i) = src.coeff(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct TensorAssignMultiThreaded
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
static inline void run(Derived1& dst, const Derived2& src, const ThreadPoolDevice& device)
|
||||
{
|
||||
TensorEvaluator<Derived1> evalDst(dst);
|
||||
TensorEvaluator<Derived2> evalSrc(src);
|
||||
const Index size = dst.size();
|
||||
|
||||
static const bool Vectorizable = TensorEvaluator<Derived1>::PacketAccess & TensorEvaluator<Derived2>::PacketAccess;
|
||||
static const int PacketSize = Vectorizable ? unpacket_traits<typename TensorEvaluator<Derived1>::PacketReturnType>::size : 1;
|
||||
|
||||
int blocksz = static_cast<int>(ceil(static_cast<float>(size)/device.numThreads()) + PacketSize - 1);
|
||||
const Index blocksize = std::max<Index>(PacketSize, (blocksz - (blocksz % PacketSize)));
|
||||
const Index numblocks = size / blocksize;
|
||||
|
||||
Index i = 0;
|
||||
vector<std::future<void> > results;
|
||||
results.reserve(numblocks);
|
||||
for (int i = 0; i < numblocks; ++i) {
|
||||
results.push_back(std::async(std::launch::async, &EvalRange<TensorEvaluator<Derived1>, TensorEvaluator<Derived2>, Index>::run, evalDst, evalSrc, i*blocksize, (i+1)*blocksize));
|
||||
}
|
||||
|
||||
for (int i = 0; i < numblocks; ++i) {
|
||||
results[i].get();
|
||||
}
|
||||
|
||||
if (numblocks * blocksize < size) {
|
||||
EvalRange<TensorEvaluator<Derived1>, TensorEvaluator<Derived2>, Index>::run(evalDst, evalSrc, numblocks * blocksize, size);
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
// GPU: the evaluation of the expressions is offloaded to a GPU.
|
||||
#ifdef EIGEN_USE_GPU
|
||||
template <typename LhsEvaluator, typename RhsEvaluator>
|
||||
__global__ void EigenMetaKernelNoCheck(LhsEvaluator evalDst, const RhsEvaluator evalSrc) {
|
||||
const int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
evalDst.coeffRef(index) = evalSrc.coeff(index);
|
||||
}
|
||||
template <typename LhsEvaluator, typename RhsEvaluator>
|
||||
__global__ void EigenMetaKernelPeel(LhsEvaluator evalDst, const RhsEvaluator evalSrc, int peel_start_offset, int size) {
|
||||
const int index = peel_start_offset + blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (index < size) {
|
||||
evalDst.coeffRef(index) = evalSrc.coeff(index);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Derived1, typename Derived2>
|
||||
struct TensorAssignGpu
|
||||
{
|
||||
typedef typename Derived1::Index Index;
|
||||
static inline void run(Derived1& dst, const Derived2& src, const GpuDevice& device)
|
||||
{
|
||||
TensorEvaluator<Derived1> evalDst(dst);
|
||||
TensorEvaluator<Derived2> evalSrc(src);
|
||||
const Index size = dst.size();
|
||||
const int block_size = std::min<int>(size, 32*32);
|
||||
const int num_blocks = size / block_size;
|
||||
EigenMetaKernelNoCheck<TensorEvaluator<Derived1>, TensorEvaluator<Derived2> > <<<num_blocks, block_size, 0, device.stream()>>>(evalDst, evalSrc);
|
||||
|
||||
const int remaining_items = size % block_size;
|
||||
if (remaining_items > 0) {
|
||||
const int peel_start_offset = num_blocks * block_size;
|
||||
const int peel_block_size = std::min<int>(size, 32);
|
||||
const int peel_num_blocks = (remaining_items + peel_block_size - 1) / peel_block_size;
|
||||
EigenMetaKernelPeel<TensorEvaluator<Derived1>, TensorEvaluator<Derived2> > <<<peel_num_blocks, peel_block_size, 0, device.stream()>>>(evalDst, evalSrc, peel_start_offset, size);
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -28,6 +28,7 @@ class TensorBase
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
typedef typename internal::traits<Derived>::Index Index;
|
||||
typedef Scalar CoeffReturnType;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketReturnType;
|
||||
|
||||
Derived& setZero() {
|
||||
return setConstant(Scalar(0));
|
||||
@ -83,6 +84,17 @@ class TensorBase
|
||||
return TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
|
||||
}
|
||||
|
||||
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
const TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const Derived, const OtherDerived>
|
||||
operator-(const OtherDerived& other) const {
|
||||
return TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
|
||||
}
|
||||
|
||||
template <typename DeviceType>
|
||||
TensorDevice<Derived, DeviceType> device(const DeviceType& device) {
|
||||
return TensorDevice<Derived, DeviceType>(device, derived());
|
||||
}
|
||||
|
||||
protected:
|
||||
template <typename OtherDerived> friend class TensorBase;
|
||||
EIGEN_DEVICE_FUNC
|
||||
|
83
unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
Normal file
83
unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h
Normal file
@ -0,0 +1,83 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
/** \class TensorDevice
|
||||
* \ingroup CXX11_Tensor_Module
|
||||
*
|
||||
* \brief Pseudo expression providing an operator = that will evaluate its argument
|
||||
* on the specified computing 'device' (GPU, thread pool, ...)
|
||||
*
|
||||
* Example:
|
||||
* C.device(EIGEN_GPU) = A + B;
|
||||
*
|
||||
* Todo: thread pools.
|
||||
* Todo: operator +=, -=, *= and so on.
|
||||
*/
|
||||
|
||||
template <typename ExpressionType, typename DeviceType> class TensorDevice {
|
||||
public:
|
||||
TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
||||
internal::TensorAssign<ExpressionType, const OtherDerived>::run(m_expression, other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
const DeviceType& m_device;
|
||||
ExpressionType& m_expression;
|
||||
};
|
||||
|
||||
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
template <typename ExpressionType> class TensorDevice<ExpressionType, ThreadPoolDevice> {
|
||||
public:
|
||||
TensorDevice(const ThreadPoolDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
||||
internal::TensorAssignMultiThreaded<ExpressionType, const OtherDerived>::run(m_expression, other, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
const ThreadPoolDevice& m_device;
|
||||
ExpressionType& m_expression;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef EIGEN_USE_GPU
|
||||
template <typename ExpressionType> class TensorDevice<ExpressionType, GpuDevice>
|
||||
{
|
||||
public:
|
||||
TensorDevice(const GpuDevice& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
|
||||
internal::TensorAssignGpu<ExpressionType, const OtherDerived>::run(m_expression, other, m_device);
|
||||
return *this;
|
||||
}
|
||||
|
||||
protected:
|
||||
const GpuDevice& m_device;
|
||||
ExpressionType& m_expression;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
|
56
unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
Normal file
56
unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h
Normal file
@ -0,0 +1,56 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
|
||||
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
// Default device for the machine (typically a single cpu core)
|
||||
struct DefaultDevice {
|
||||
};
|
||||
|
||||
|
||||
// Multiple cpu cores
|
||||
// We should really use a thread pool here but first we need to find a portable thread pool library.
|
||||
#ifdef EIGEN_USE_THREADS
|
||||
struct ThreadPoolDevice {
|
||||
ThreadPoolDevice(/*ThreadPool* pool, */size_t num_cores) : /*pool_(pool), */num_threads_(num_cores) { }
|
||||
size_t numThreads() const { return num_threads_; }
|
||||
/*ThreadPool* threadPool() const { return pool_; }*/
|
||||
|
||||
private:
|
||||
// todo: NUMA, ...
|
||||
size_t num_threads_;
|
||||
/*ThreadPool* pool_;*/
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
// GPU offloading
|
||||
#ifdef EIGEN_USE_GPU
|
||||
struct GpuDevice {
|
||||
// todo: support for multiple gpu;
|
||||
GpuDevice() {
|
||||
cudaStreamCreate(&stream_);
|
||||
}
|
||||
~GpuDevice() {
|
||||
cudaStreamDestroy(stream_);
|
||||
}
|
||||
const cudaStream_t& stream() const { return stream_; }
|
||||
|
||||
private:
|
||||
cudaStream_t stream_;
|
||||
};
|
||||
#endif
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_TYPE_H
|
@ -79,16 +79,16 @@ struct Sizes : internal::numeric_list<std::size_t, Indices...> {
|
||||
|
||||
Sizes() { }
|
||||
template <typename DenseIndex>
|
||||
explicit Sizes(const array<DenseIndex, Base::count>&/* indices*/) {
|
||||
explicit Sizes(const array<DenseIndex, Base::count>& /*indices*/) {
|
||||
// todo: add assertion
|
||||
}
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
explicit Sizes(std::initializer_list<std::size_t>/* l*/) {
|
||||
explicit Sizes(std::initializer_list<std::size_t> /*l*/) {
|
||||
// todo: add assertion
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename T> Sizes& operator = (const T&/* other*/) {
|
||||
template <typename T> Sizes& operator = (const T& /*other*/) {
|
||||
// add assertion failure if the size of other is different
|
||||
return *this;
|
||||
}
|
||||
@ -119,7 +119,7 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
|
||||
static const size_t count = Base::count;
|
||||
static const std::size_t total_size = internal::arg_prod<Base>::value;
|
||||
|
||||
static const size_t TotalSize() {
|
||||
static size_t TotalSize() {
|
||||
return internal::arg_prod<Base>::value;
|
||||
}
|
||||
|
||||
@ -181,14 +181,11 @@ template <typename DenseIndex, std::size_t NumDims>
|
||||
struct DSizes : array<DenseIndex, NumDims> {
|
||||
typedef array<DenseIndex, NumDims> Base;
|
||||
|
||||
size_t TotalSize() const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() const {
|
||||
return internal::array_prod(*static_cast<const Base*>(this));
|
||||
}
|
||||
|
||||
DSizes() { }
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
// explicit DSizes(std::initializer_list<DenseIndex> l) : Base(l) { }
|
||||
#endif
|
||||
explicit DSizes(const array<DenseIndex, NumDims>& a) : Base(a) { }
|
||||
|
||||
DSizes& operator = (const array<DenseIndex, NumDims>& other) {
|
||||
@ -203,7 +200,6 @@ struct DSizes : array<DenseIndex, NumDims> {
|
||||
size_t IndexOfRowMajor(const array<DenseIndex, NumDims>& indices) const {
|
||||
return internal::tensor_index_linearization_helper<DenseIndex, NumDims, NumDims - 1, true>::run(indices, *static_cast<const Base*>(this));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
@ -29,32 +29,38 @@ struct TensorEvaluator
|
||||
{
|
||||
typedef typename Derived::Index Index;
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
typedef typename Derived::Scalar& CoeffReturnType;
|
||||
typedef typename Derived::Packet Packet;
|
||||
typedef typename Derived::Scalar CoeffReturnType;
|
||||
typedef typename Derived::Packet PacketReturnType;
|
||||
|
||||
enum {
|
||||
IsAligned = Derived::IsAligned,
|
||||
PacketAccess = Derived::PacketAccess,
|
||||
};
|
||||
|
||||
TensorEvaluator(Derived& m)
|
||||
: m_data(const_cast<Scalar*>(m.data()))
|
||||
{ }
|
||||
|
||||
CoeffReturnType coeff(Index index) const {
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const {
|
||||
return m_data[index];
|
||||
}
|
||||
|
||||
Scalar& coeffRef(Index index) {
|
||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) {
|
||||
return m_data[index];
|
||||
}
|
||||
|
||||
// to do: vectorized evaluation.
|
||||
/* template<int LoadMode>
|
||||
template<int LoadMode>
|
||||
PacketReturnType packet(Index index) const
|
||||
{
|
||||
return ploadt<PacketScalar, LoadMode>(m_data + index);
|
||||
return internal::ploadt<Packet, LoadMode>(m_data + index);
|
||||
}
|
||||
|
||||
template<int StoreMode>
|
||||
void writePacket(Index index, const PacketScalar& x)
|
||||
template <int StoreMode>
|
||||
void writePacket(Index index, const Packet& x)
|
||||
{
|
||||
return pstoret<Scalar, PacketScalar, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
|
||||
}*/
|
||||
return internal::pstoret<Scalar, Packet, StoreMode>(m_data + index, x);
|
||||
}
|
||||
|
||||
protected:
|
||||
Scalar* m_data;
|
||||
@ -70,6 +76,11 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType> >
|
||||
{
|
||||
typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
|
||||
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<ArgType>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<ArgType>::PacketAccess & internal::functor_traits<UnaryOp>::PacketAccess,
|
||||
};
|
||||
|
||||
TensorEvaluator(const XprType& op)
|
||||
: m_functor(op.functor()),
|
||||
m_argImpl(op.nestedExpression())
|
||||
@ -77,12 +88,19 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType> >
|
||||
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_functor(m_argImpl.coeff(index));
|
||||
}
|
||||
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
|
||||
}
|
||||
|
||||
private:
|
||||
const UnaryOp m_functor;
|
||||
TensorEvaluator<ArgType> m_argImpl;
|
||||
@ -96,6 +114,12 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
{
|
||||
typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
|
||||
|
||||
enum {
|
||||
IsAligned = TensorEvaluator<LeftArgType>::IsAligned & TensorEvaluator<RightArgType>::IsAligned,
|
||||
PacketAccess = TensorEvaluator<LeftArgType>::PacketAccess & TensorEvaluator<RightArgType>::PacketAccess &
|
||||
internal::functor_traits<BinaryOp>::PacketAccess,
|
||||
};
|
||||
|
||||
TensorEvaluator(const XprType& op)
|
||||
: m_functor(op.functor()),
|
||||
m_leftImpl(op.lhsExpression()),
|
||||
@ -104,11 +128,17 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
|
||||
|
||||
typedef typename XprType::Index Index;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
|
||||
CoeffReturnType coeff(Index index) const
|
||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
||||
{
|
||||
return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
|
||||
}
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
|
||||
{
|
||||
return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
|
||||
}
|
||||
|
||||
private:
|
||||
const BinaryOp m_functor;
|
||||
|
@ -33,6 +33,9 @@ struct traits<TensorCwiseUnaryOp<UnaryOp, XprType> >
|
||||
typedef typename result_of<
|
||||
UnaryOp(typename XprType::Scalar)
|
||||
>::type Scalar;
|
||||
typedef typename result_of<
|
||||
UnaryOp(typename XprType::Packet)
|
||||
>::type Packet;
|
||||
typedef typename XprType::Nested XprTypeNested;
|
||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||
};
|
||||
@ -57,14 +60,16 @@ template<typename UnaryOp, typename XprType>
|
||||
class TensorCwiseUnaryOp : public TensorBase<TensorCwiseUnaryOp<UnaryOp, XprType> >
|
||||
{
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index;
|
||||
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Scalar Scalar;
|
||||
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Packet Packet;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorCwiseUnaryOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorCwiseUnaryOp>::Index Index;
|
||||
|
||||
inline TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||
: m_xpr(xpr), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
@ -92,6 +97,7 @@ struct traits<TensorCwiseBinaryOp<BinaryOp, LhsXprType, RhsXprType> >
|
||||
typename RhsXprType::Scalar
|
||||
)
|
||||
>::type Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
|
||||
typename traits<RhsXprType>::StorageKind>::ret StorageKind;
|
||||
typedef typename promote_index_type<typename traits<LhsXprType>::Index,
|
||||
@ -123,14 +129,17 @@ class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsX
|
||||
{
|
||||
public:
|
||||
typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Scalar Scalar;
|
||||
typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Packet Packet;
|
||||
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType,
|
||||
typename RhsXprType::CoeffReturnType>::ret CoeffReturnType;
|
||||
typedef typename internal::promote_storage_type<typename LhsXprType::PacketReturnType,
|
||||
typename RhsXprType::PacketReturnType>::ret PacketReturnType;
|
||||
typedef typename Eigen::internal::nested<TensorCwiseBinaryOp>::type Nested;
|
||||
typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::StorageKind StorageKind;
|
||||
typedef typename Eigen::internal::traits<TensorCwiseBinaryOp>::Index Index;
|
||||
|
||||
inline TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp())
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp())
|
||||
: m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
|
@ -33,11 +33,17 @@ class TensorFixedSize : public TensorBase<TensorFixedSize<Scalar_, Dimensions_,
|
||||
typedef typename internal::traits<Self>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<Self>::Index Index;
|
||||
typedef Scalar_ Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
static const int Options = Options_;
|
||||
static const int Options = Options_;
|
||||
|
||||
enum {
|
||||
IsAligned = bool(EIGEN_ALIGN),
|
||||
PacketAccess = true,
|
||||
};
|
||||
|
||||
typedef Dimensions_ Dimensions;
|
||||
static const std::size_t NumIndices = Dimensions::count;
|
||||
|
||||
|
@ -14,12 +14,14 @@ namespace Eigen {
|
||||
|
||||
template<typename Scalar_, std::size_t NumIndices_, int Options_ = 0> class Tensor;
|
||||
template<typename Scalar_, typename Dimensions, int Options_ = 0> class TensorFixedSize;
|
||||
template<typename PlainObjectType> class TensorMap;
|
||||
template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap;
|
||||
template<typename Derived> class TensorBase;
|
||||
|
||||
template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp;
|
||||
template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp;
|
||||
|
||||
template<typename ExpressionType, typename DeviceType> class TensorDevice;
|
||||
|
||||
// Move to internal?
|
||||
template<typename Derived> struct TensorEvaluator;
|
||||
|
||||
|
@ -22,16 +22,16 @@ template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Strid
|
||||
*
|
||||
*/
|
||||
|
||||
template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap<PlainObjectType> >
|
||||
template<typename PlainObjectType, int Options_> class TensorMap : public TensorBase<TensorMap<PlainObjectType, Options_> >
|
||||
{
|
||||
public:
|
||||
typedef TensorMap<PlainObjectType> Self;
|
||||
typedef TensorMap<PlainObjectType, Options_> Self;
|
||||
typedef typename PlainObjectType::Base Base;
|
||||
typedef typename Eigen::internal::nested<Self>::type Nested;
|
||||
typedef typename internal::traits<PlainObjectType>::StorageKind StorageKind;
|
||||
typedef typename internal::traits<PlainObjectType>::Index Index;
|
||||
typedef typename internal::traits<PlainObjectType>::Scalar Scalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
typedef typename Base::CoeffReturnType CoeffReturnType;
|
||||
|
||||
@ -43,13 +43,12 @@ template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap
|
||||
typedef Scalar* PointerType;
|
||||
typedef PointerType PointerArgType;
|
||||
|
||||
// Fixed size plain object type only
|
||||
/* EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr) {
|
||||
// The number of dimensions used to construct a tensor must be equal to the rank of the tensor.
|
||||
//EIGEN_STATIC_ASSERT(1 == PlainObjectType::NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
// todo: add assert to ensure we don't screw up here.
|
||||
}*/
|
||||
static const int Options = Options_;
|
||||
|
||||
enum {
|
||||
IsAligned = bool(EIGEN_ALIGN) && ((int(Options_)&Aligned)==Aligned),
|
||||
PacketAccess = true,
|
||||
};
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(array<DenseIndex, PlainObjectType::NumIndices>({{firstDimension}})) {
|
||||
@ -65,7 +64,7 @@ template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap
|
||||
}
|
||||
#endif
|
||||
|
||||
inline TensorMap(PointerArgType dataPtr, const array<Index, PlainObjectType::NumIndices>& dimensions)
|
||||
inline TensorMap(PointerArgType dataPtr, const array<Index, PlainObjectType::NumIndices>& dimensions)
|
||||
: m_data(dataPtr), m_dimensions(dimensions)
|
||||
{ }
|
||||
|
||||
@ -80,12 +79,97 @@ template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; }
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(const array<Index, PlainObjectType::NumIndices>& indices) const
|
||||
{
|
||||
// eigen_assert(checkIndexRange(indices));
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = m_dimensions.IndexOfRowMajor(indices);
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = m_dimensions.IndexOfColMajor(indices);
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const
|
||||
{
|
||||
static_assert(sizeof...(otherIndices) + 1 == PlainObjectType::NumIndices, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor.");
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = m_dimensions.IndexOfRowMajor(array<Index, PlainObjectType::NumIndices>{{firstIndex, otherIndices...}});
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = m_dimensions.IndexOfColMajor(array<Index, PlainObjectType::NumIndices>{{firstIndex, otherIndices...}});
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const
|
||||
{
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
return m_data[index];
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i1 + i0 * m_dimensions[0];
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + i1 * m_dimensions[0];
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0);
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(const array<Index, PlainObjectType::NumIndices>& indices)
|
||||
{
|
||||
// eigen_assert(checkIndexRange(indices));
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = m_dimensions.IndexOfRowMajor(indices);
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = m_dimensions.IndexOfColMajor(indices);
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
template<typename... IndexTypes> EIGEN_DEVICE_FUNC
|
||||
@ -100,8 +184,60 @@ template<typename PlainObjectType> class TensorMap : public TensorBase<TensorMap
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
#else
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index index)
|
||||
{
|
||||
eigen_internal_assert(index >= 0 && index < size());
|
||||
return m_data[index];
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1)
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i1 + i0 * m_dimensions[0];
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + i1 * m_dimensions[0];
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2)
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i2 + m_dimensions[1] * (i1 + m_dimensions[0] * i0);
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2);
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3)
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0));
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3));
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4)
|
||||
{
|
||||
if (PlainObjectType::Options&RowMajor) {
|
||||
const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)));
|
||||
return m_data[index];
|
||||
} else {
|
||||
const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4)));
|
||||
return m_data[index];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
template<typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC
|
||||
Self& operator=(const OtherDerived& other)
|
||||
|
@ -72,9 +72,6 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, void>
|
||||
TensorStorage() { }
|
||||
TensorStorage(const TensorStorage<T, NumIndices_, Dynamic, Options_, void>& other) : Base_(other) { }
|
||||
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
// TensorStorage(TensorStorage<T, NumIndices_, Dynamic, Options_, void>&&) = default;
|
||||
#endif
|
||||
TensorStorage(internal::constructor_without_unaligned_array_assert) : Base_(internal::constructor_without_unaligned_array_assert()) {}
|
||||
TensorStorage(DenseIndex size, const array<DenseIndex, NumIndices_>& dimensions) : Base_(size, dimensions) {}
|
||||
|
||||
@ -111,22 +108,6 @@ class TensorStorage<T, NumIndices_, Dynamic, Options_, typename internal::gen_nu
|
||||
return *this;
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAVE_RVALUE_REFERENCES
|
||||
/* TensorStorage(Self_&& other)
|
||||
: m_data(std::move(other.m_data)), m_dimensions(std::move(other.m_dimensions))
|
||||
{
|
||||
other.m_data = nullptr;
|
||||
}
|
||||
|
||||
Self_& operator=(Self_&& other)
|
||||
{
|
||||
using std::swap;
|
||||
swap(m_data, other.m_data);
|
||||
swap(m_dimensions, other.m_dimensions);
|
||||
return *this;
|
||||
}*/
|
||||
#endif
|
||||
|
||||
~TensorStorage() { internal::conditional_aligned_delete_auto<T,(Options_&DontAlign)==0>(m_data, internal::array_prod(m_dimensions)); }
|
||||
void swap(Self_& other)
|
||||
{ std::swap(m_data,other.m_data); std::swap(m_dimensions,other.m_dimensions); }
|
||||
|
@ -104,4 +104,7 @@ if(EIGEN_TEST_CXX11)
|
||||
ei_add_test(cxx11_tensor_assign "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_expr "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_map "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_device "-std=c++0x")
|
||||
# ei_add_test(cxx11_tensor_fixed_size "-std=c++0x")
|
||||
ei_add_test(cxx11_tensor_thread_pool "-std=c++0x")
|
||||
endif()
|
||||
|
126
unsupported/test/cxx11_tensor_device.cpp
Normal file
126
unsupported/test/cxx11_tensor_device.cpp
Normal file
@ -0,0 +1,126 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#define EIGEN_TEST_NO_LONGDOUBLE
|
||||
#define EIGEN_TEST_NO_COMPLEX
|
||||
#define EIGEN_TEST_FUNC cxx11_tensor_device
|
||||
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
|
||||
#include "main.h"
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
using Eigen::Tensor;
|
||||
using Eigen::RowMajor;
|
||||
|
||||
// Context for evaluation on cpu
|
||||
struct CPUContext {
|
||||
CPUContext(const Eigen::Tensor<float, 3>& in1, Eigen::Tensor<float, 3>& in2, Eigen::Tensor<float, 3>& out) : in1_(in1), in2_(in2), out_(out) { }
|
||||
|
||||
const Eigen::Tensor<float, 3>& in1() const { return in1_; }
|
||||
const Eigen::Tensor<float, 3>& in2() const { return in2_; }
|
||||
Eigen::TensorDevice<Eigen::Tensor<float, 3>, Eigen::DefaultDevice> out() { return TensorDevice<Eigen::Tensor<float, 3>, Eigen::DefaultDevice>(cpu_device_, out_); }
|
||||
|
||||
private:
|
||||
const Eigen::Tensor<float, 3>& in1_;
|
||||
const Eigen::Tensor<float, 3>& in2_;
|
||||
Eigen::Tensor<float, 3>& out_;
|
||||
|
||||
Eigen::DefaultDevice cpu_device_;
|
||||
};
|
||||
|
||||
|
||||
// Context for evaluation on GPU
|
||||
struct GPUContext {
|
||||
GPUContext(const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1, Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2, Eigen::TensorMap<Eigen::Tensor<float, 3> >& out) : in1_(in1), in2_(in2), out_(out) { }
|
||||
|
||||
const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1() const { return in1_; }
|
||||
const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2() const { return in2_; }
|
||||
Eigen::TensorDevice<Eigen::TensorMap<Eigen::Tensor<float, 3> >, Eigen::GpuDevice> out() { return TensorDevice<Eigen::TensorMap<Eigen::Tensor<float, 3> >, Eigen::GpuDevice>(gpu_device_, out_); }
|
||||
|
||||
private:
|
||||
const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1_;
|
||||
const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2_;
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3> >& out_;
|
||||
Eigen::GpuDevice gpu_device_;
|
||||
};
|
||||
|
||||
|
||||
// The actual expression to evaluate
|
||||
template <typename Context>
|
||||
static void test_contextual_eval(Context* context)
|
||||
{
|
||||
context->out() = context->in1() + context->in2() * 3.14f;
|
||||
}
|
||||
|
||||
static void test_cpu() {
|
||||
Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(2,3,7));
|
||||
Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(2,3,7));
|
||||
Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(2,3,7));
|
||||
|
||||
in1.setRandom();
|
||||
in2.setRandom();
|
||||
CPUContext context(in1, in2, out);
|
||||
test_contextual_eval(&context);
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int k = 0; k < 7; ++k) {
|
||||
VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_gpu() {
|
||||
Eigen::Tensor<float, 3> in1(Eigen::array<int, 3>(2,3,7));
|
||||
Eigen::Tensor<float, 3> in2(Eigen::array<int, 3>(2,3,7));
|
||||
Eigen::Tensor<float, 3> out(Eigen::array<int, 3>(2,3,7));
|
||||
in1.setRandom();
|
||||
in2.setRandom();
|
||||
|
||||
std::size_t in1_bytes = in1.size() * sizeof(float);
|
||||
std::size_t in2_bytes = in2.size() * sizeof(float);
|
||||
std::size_t out_bytes = out.size() * sizeof(float);
|
||||
|
||||
float* d_in1;
|
||||
float* d_in2;
|
||||
float* d_out;
|
||||
cudaMalloc((void**)(&d_in1), in1_bytes);
|
||||
cudaMalloc((void**)(&d_in2), in2_bytes);
|
||||
cudaMalloc((void**)(&d_out), out_bytes);
|
||||
|
||||
cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<int, 3>(2,3,7));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<int, 3>(2,3,7));
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(2,3,7));
|
||||
|
||||
GPUContext context(gpu_in1, gpu_in2, gpu_out);
|
||||
test_contextual_eval(&context);
|
||||
|
||||
cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int k = 0; k < 7; ++k) {
|
||||
VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * 3.14f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void test_cxx11_tensor_device()
|
||||
{
|
||||
CALL_SUBTEST(test_cpu());
|
||||
CALL_SUBTEST(test_gpu());
|
||||
}
|
@ -159,9 +159,37 @@ static void test_3d()
|
||||
}
|
||||
|
||||
|
||||
static void test_array()
|
||||
{
|
||||
TensorFixedSize<float, Sizes<2, 3, 7> > mat1;
|
||||
float val = 0.0;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int k = 0; k < 7; ++k) {
|
||||
mat1(array<ptrdiff_t, 3>(i,j,k)) = val;
|
||||
val += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TensorFixedSize<float, Sizes<2, 3, 7> > mat3;
|
||||
mat3 = mat1.cwisePow(3.5f);
|
||||
|
||||
val = 0.0;
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int k = 0; k < 7; ++k) {
|
||||
VERIFY_IS_APPROX(mat3(array<ptrdiff_t, 3>(i,j,k)), powf(val, 3.5f));
|
||||
val += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void test_cxx11_tensor_fixed_size()
|
||||
{
|
||||
CALL_SUBTEST(test_1d());
|
||||
CALL_SUBTEST(test_2d());
|
||||
CALL_SUBTEST(test_3d());
|
||||
CALL_SUBTEST(test_array());
|
||||
}
|
||||
|
37
unsupported/test/cxx11_tensor_thread_pool.cpp
Normal file
37
unsupported/test/cxx11_tensor_thread_pool.cpp
Normal file
@ -0,0 +1,37 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#define EIGEN_USE_THREADS
|
||||
|
||||
|
||||
#include "main.h"
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
using Eigen::Tensor;
|
||||
|
||||
void test_cxx11_tensor_thread_pool()
|
||||
{
|
||||
Eigen::Tensor<float, 3> in1(Eigen::array<ptrdiff_t, 3>(2,3,7));
|
||||
Eigen::Tensor<float, 3> in2(Eigen::array<ptrdiff_t, 3>(2,3,7));
|
||||
Eigen::Tensor<float, 3> out(Eigen::array<ptrdiff_t, 3>(2,3,7));
|
||||
|
||||
in1.setRandom();
|
||||
in2.setRandom();
|
||||
|
||||
Eigen::ThreadPoolDevice thread_pool_device(3);
|
||||
out.device(thread_pool_device) = in1 + in2 * 3.14;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int k = 0; k < 7; ++k) {
|
||||
VERIFY_IS_APPROX(out(Eigen::array<ptrdiff_t, 3>(i,j,k)), in1(Eigen::array<ptrdiff_t, 3>(i,j,k)) + in2(Eigen::array<ptrdiff_t, 3>(i,j,k)) * 3.14f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user