diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index e9aa22183..7b8d34321 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -16,50 +16,157 @@ namespace internal { // Standard reduction functors template struct SumReducer { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SumReducer() : m_sum(0) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) { - m_sum += t; + static const bool PacketAccess = true; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + (*accum) += t; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const { - return m_sum; + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = padd(*accum, p); } - private: - typename internal::remove_all::type m_sum; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return static_cast(0); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(0); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizePacket(const T saccum, const Packet& vaccum) const { + return saccum + predux(vaccum); + } +}; + +template struct MeanReducer +{ + static const bool PacketAccess = true; + MeanReducer() : count_(0) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { + (*accum) += t; + count_++; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { + (*accum) = padd(*accum, p); + count_ += packet_traits::size; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return static_cast(0); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(0); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum / count_; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizePacket(const T saccum, const Packet& vaccum) const { + return (saccum + predux(vaccum)) / count_; + } + + protected: + int count_; }; template struct MaxReducer { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaxReducer() : m_max(-(std::numeric_limits::max)()) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) { - if (t > m_max) { m_max = t; } + static const bool PacketAccess = true; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t > *accum) { *accum = t; } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const { - return m_max; + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmax(*accum, p); } - private: - typename internal::remove_all::type m_max; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return -(std::numeric_limits::max)(); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(-(std::numeric_limits::max)()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizePacket(const T saccum, const Packet& vaccum) const { + return (std::max)(saccum, predux_max(vaccum)); + } }; template struct MinReducer { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MinReducer() : m_min((std::numeric_limits::max)()) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) { - if (t < m_min) { m_min = t; } + static const bool PacketAccess = true; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t < *accum) { *accum = t; } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const { - return m_min; + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmin(*accum, p); } - private: - typename internal::remove_all::type m_min; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return (std::numeric_limits::max)(); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1((std::numeric_limits::max)()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizePacket(const T saccum, const Packet& vaccum) const { + return (std::min)(saccum, predux_min(vaccum)); + } }; +template struct ProdReducer +{ + static const bool PacketAccess = true; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + (*accum) *= t; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmul(*accum, p); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return static_cast(1); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(1); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizePacket(const T saccum, const Packet& vaccum) const { + return saccum * predux_mul(vaccum); + } +}; + #if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__) // We're not compiling a cuda kernel template struct UniformRandomGenerator { + + static const bool PacketAccess = true; + template T operator()(Index, Index = 0) const { return random(); @@ -81,16 +188,19 @@ template struct UniformRandomGenerator { template struct UniformRandomGenerator; template <> struct UniformRandomGenerator { - UniformRandomGenerator() { + + static const bool PacketAccess = true; + + EIGEN_DEVICE_FUNC UniformRandomGenerator() { const int tid = blockIdx.x * blockDim.x + threadIdx.x; curand_init(0, tid, 0, &m_state); } - template + template EIGEN_DEVICE_FUNC float operator()(Index, Index = 0) const { return curand_uniform(&m_state); } - template + template EIGEN_DEVICE_FUNC float4 packetOp(Index, Index = 0) const { return curand_uniform4(&m_state); } @@ -100,15 +210,18 @@ template <> struct UniformRandomGenerator { }; template <> struct UniformRandomGenerator { - UniformRandomGenerator() { + + static const bool PacketAccess = true; + + EIGEN_DEVICE_FUNC UniformRandomGenerator() { const int tid = blockIdx.x * blockDim.x + threadIdx.x; curand_init(0, tid, 0, &m_state); } - template + template EIGEN_DEVICE_FUNC double operator()(Index, Index = 0) const { return curand_uniform_double(&m_state); } - template + template EIGEN_DEVICE_FUNC double2 packetOp(Index, Index = 0) const { return curand_uniform2_double(&m_state); } @@ -120,6 +233,84 @@ template <> struct UniformRandomGenerator { #endif +#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && __cplusplus > 199711 +// We're not compiling a cuda kernel +template struct NormalRandomGenerator { + + static const bool PacketAccess = true; + + NormalRandomGenerator() : m_distribution(0, 1) {} + NormalRandomGenerator(const NormalRandomGenerator& other) : m_distribution(other.m_distribution) { } + + template + T operator()(Index, Index = 0) const { + return m_distribution(m_generator); + } + template + typename internal::packet_traits::type packetOp(Index, Index = 0) const { + const int packetSize = internal::packet_traits::size; + EIGEN_ALIGN_DEFAULT T values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = m_distribution(m_generator); + } + return internal::pload::type>(values); + } + + mutable std::normal_distribution m_distribution; + mutable std::default_random_engine m_generator; +}; + +#elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__) + +// We're compiling a cuda kernel +template struct NormalRandomGenerator; + +template <> struct NormalRandomGenerator { + + static const bool PacketAccess = true; + + EIGEN_DEVICE_FUNC NormalRandomGenerator() { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(0, tid, 0, &m_state); + } + + template EIGEN_DEVICE_FUNC + float operator()(Index, Index = 0) const { + return curand_normal(&m_state); + } + template EIGEN_DEVICE_FUNC + float4 packetOp(Index, Index = 0) const { + return curand_normal4(&m_state); + } + + private: + mutable curandStatePhilox4_32_10_t m_state; +}; + +template <> struct NormalRandomGenerator { + + static const bool PacketAccess = true; + + EIGEN_DEVICE_FUNC NormalRandomGenerator() { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + curand_init(0, tid, 0, &m_state); + } + template EIGEN_DEVICE_FUNC + double operator()(Index, Index = 0) const { + return curand_normal_double(&m_state); + } + template EIGEN_DEVICE_FUNC + double2 packetOp(Index, Index = 0) const { + return curand_normal2_double(&m_state); + } + + private: + mutable curandStatePhilox4_32_10_t m_state; +}; + +#endif + + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index cbe87394b..eebcc4850 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -43,6 +43,75 @@ struct nested, 1, typename eval type; }; + +template +struct are_inner_most_dims { + static const bool value = false; +}; +#if __cplusplus > 199711L +template +struct are_inner_most_dims{ + static const bool value = indices_statically_known_to_increase()() && + index_statically_eq()(0, 0) && + index_statically_eq()(array_size::value-1, array_size::value-1); +}; +template +struct are_inner_most_dims{ + static const bool value = indices_statically_known_to_increase()() && + index_statically_eq()(0, NumTensorDims - array_size::value) && + index_statically_eq()(array_size::value - 1, NumTensorDims - 1); +}; +#endif + + +template +struct GenericDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { + EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; + GenericDimReducer::reduce(self, input, reducer, accum); + } + } +}; +template +struct GenericDimReducer<0, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { + for (int j = 0; j < self.m_reducedDims[0]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; + reducer.reduce(self.m_impl.coeff(input), accum); + } + } +}; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { + typename Self::CoeffReturnType accum = reducer.initialize(); + for (typename Self::Index j = 0; j < numValuesToReduce; ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + return reducer.finalize(accum); + } +}; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { + const int packetSize = internal::unpacket_traits::size; + const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; + typename Self::PacketReturnType p = reducer.template initializePacket(); + for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) { + reducer.reducePacket(self.m_impl.template packet(firstIndex + j), &p); + } + typename Self::CoeffReturnType accum = reducer.initialize(); + for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + return reducer.finalizePacket(accum, p); + } +}; + } // end namespace internal @@ -52,8 +121,8 @@ class TensorReductionOp : public TensorBase typedef typename Eigen::internal::traits::Scalar Scalar; typedef typename Eigen::internal::traits::Packet Packet; typedef typename Eigen::NumTraits::Real RealScalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename internal::remove_const::type PacketReturnType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; @@ -85,20 +154,27 @@ struct TensorEvaluator, Device> typedef typename XprType::Index Index; static const int NumInputDims = internal::array_size::Dimensions>::value; static const int NumReducedDims = internal::array_size::value; - static const int NumDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims; - typedef DSizes Dimensions; + static const int NumOutputDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims; + typedef DSizes Dimensions; typedef typename XprType::Scalar Scalar; + typedef TensorEvaluator, Device> Self; + static const bool InputPacketAccess = TensorEvaluator::PacketAccess; enum { IsAligned = false, - PacketAccess = false, // The code isn't vectorized properly yet + PacketAccess = Self::InputPacketAccess && Op::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented }; + static const bool ReducingInnerMostDims = internal::are_inner_most_dims::value; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_reducer(op.reducer()) { EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE); + // Bitmap indicating if an input dimension is reduced or not. array reduced; for (int i = 0; i < NumInputDims; ++i) { reduced[i] = false; @@ -122,24 +198,41 @@ struct TensorEvaluator, Device> } } - m_outputStrides[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + // Precompute output strides. + if (Layout == ColMajor) { + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_outputStrides[NumOutputDims - 1] = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + } } - array strides; - strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - strides[i] = strides[i-1] * input_dims[i-1]; + // Precompute input strides. + array input_strides; + if (Layout == ColMajor) { + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i-1] * input_dims[i-1]; + } + } else { + input_strides[NumInputDims - 1] = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } } + outputIndex = 0; reduceIndex = 0; for (int i = 0; i < NumInputDims; ++i) { if (reduced[i]) { - m_reducedStrides[reduceIndex] = strides[i]; + m_reducedStrides[reduceIndex] = input_strides[i]; ++reduceIndex; } else { - m_preservedStrides[outputIndex] = strides[i]; + m_preservedStrides[outputIndex] = input_strides[i]; ++outputIndex; } } @@ -147,6 +240,7 @@ struct TensorEvaluator, Device> // Special case for full reductions if (NumInputDims == NumReducedDims) { m_dimensions[0] = 1; + m_preservedStrides[0] = internal::array_prod(input_dims); } } @@ -161,14 +255,22 @@ struct TensorEvaluator, Device> m_impl.cleanup(); } - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename internal::remove_const::type PacketReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { Op reducer(m_reducer); - reduce(firstInput(index), 0, reducer); - return reducer.finalize(); + if (ReducingInnerMostDims) { + const Index num_values_to_reduce = + (Layout == ColMajor) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; + return internal::InnerMostDimReducer::reduce(*this, firstInput(index), + num_values_to_reduce, reducer); + } else { + typename Self::CoeffReturnType accum = reducer.initialize(); + internal::GenericDimReducer::reduce(*this, firstInput(index), reducer, &accum); + return reducer.finalize(accum); + } } // TODO(bsteiner): provide a more efficient implementation. @@ -179,9 +281,20 @@ struct TensorEvaluator, Device> EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); - EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = coeff(index+i); + EIGEN_ALIGN_DEFAULT typename internal::remove_const::type values[packetSize]; + if (ReducingInnerMostDims) { + const Index num_values_to_reduce = + (Layout == ColMajor) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; + const Index firstIndex = firstInput(index); + for (Index i = 0; i < packetSize; ++i) { + Op reducer(m_reducer); + values[i] = internal::InnerMostDimReducer::reduce(*this, firstIndex + i * num_values_to_reduce, + num_values_to_reduce, reducer); + } + } else { + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index + i); + } } PacketReturnType rslt = internal::pload(values); return rslt; @@ -190,34 +303,59 @@ struct TensorEvaluator, Device> Scalar* data() const { return NULL; } private: + template friend struct internal::GenericDimReducer; + template friend struct internal::InnerMostDimReducer; + + // Returns the Index in the input tensor of the first value that needs to be + // used to compute the reduction at output index "index". EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { - Index startInput = 0; - for (int i = NumDims - 1; i > 0; --i) { - const Index idx = index / m_outputStrides[i]; - startInput += idx * m_preservedStrides[i]; - index -= idx * m_outputStrides[i]; + if (ReducingInnerMostDims) { + if (Layout == ColMajor) { + return index * m_preservedStrides[0]; + } else { + return index * m_preservedStrides[NumOutputDims - 1]; + } + } + Index startInput = 0; + if (Layout == ColMajor) { + for (int i = NumOutputDims - 1; i > 0; --i) { + // This is index_i in the output tensor. + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + startInput += index * m_preservedStrides[0]; + } else { + for (int i = 0; i < NumOutputDims - 1; ++i) { + // This is index_i in the output tensor. + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + startInput += index * m_preservedStrides[NumOutputDims - 1]; } - startInput += index * m_preservedStrides[0]; return startInput; } - EIGEN_DEVICE_FUNC void reduce(Index firstIndex, int DimIndex, Op& reducer) const { - for (int j = 0; j < m_reducedDims[DimIndex]; ++j) { - const Index input = firstIndex + j * m_reducedStrides[DimIndex]; - if (DimIndex < NumReducedDims-1) { - reduce(input, DimIndex+1, reducer); - } else { - reducer.reduce(m_impl.coeff(input)); - } - } - } - + // Dimensions of the output of the operation. Dimensions m_dimensions; - array m_outputStrides; - array m_preservedStrides; + // Precomputed strides for the output tensor. + array m_outputStrides; + // Subset of strides of the input tensor for the non-reduced dimensions. + // Indexed by output dimensions. + array m_preservedStrides; + + // Subset of strides of the input tensor for the reduced dimensions. + // Indexed by reduced dimensions. array m_reducedStrides; + // Size of the input dimensions that are reduced. + // Indexed by reduced dimensions. array m_reducedDims; + + // Evaluator for the input expression. TensorEvaluator m_impl; + + // Operation to apply for computing the reduction. Op m_reducer; }; diff --git a/unsupported/test/cxx11_tensor_random.cpp b/unsupported/test/cxx11_tensor_random.cpp new file mode 100644 index 000000000..8276ae822 --- /dev/null +++ b/unsupported/test/cxx11_tensor_random.cpp @@ -0,0 +1,78 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +static void test_default() +{ + Tensor vec(6); + vec.setRandom(); + + // Fixme: we should check that the generated numbers follow a uniform + // distribution instead. + for (int i = 1; i < 6; ++i) { + VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1)); + } +} + +static void test_normal() +{ + Tensor vec(6); + vec.setRandom>(); + + // Fixme: we should check that the generated numbers follow a gaussian + // distribution instead. + for (int i = 1; i < 6; ++i) { + VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1)); + } +} + + +struct MyGenerator { + MyGenerator() { } + MyGenerator(const MyGenerator&) { } + + // Return a random value to be used. "element_location" is the + // location of the entry to set in the tensor, it can typically + // be ignored. + int operator()(Eigen::DenseIndex element_location, Eigen::DenseIndex /*unused*/ = 0) const { + return 3 * element_location; + } + + // Same as above but generates several numbers at a time. + typename internal::packet_traits::type packetOp( + Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const { + const int packetSize = internal::packet_traits::size; + EIGEN_ALIGN_DEFAULT int values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = 3 * (packet_location + i); + } + return internal::pload::type>(values); + } +}; + + +static void test_custom() +{ + Tensor vec(6); + vec.setRandom(); + + for (int i = 0; i < 6; ++i) { + VERIFY_IS_EQUAL(vec(i), 3*i); + } +} + +void test_cxx11_tensor_random() +{ + CALL_SUBTEST(test_default()); + CALL_SUBTEST(test_normal()); + CALL_SUBTEST(test_custom()); +}