From dc03b8f3a16571a6e776d44529d3e69851db7f97 Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Fri, 3 Jun 2016 17:37:04 +0100 Subject: [PATCH 01/85] Add generic scan method --- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 12f8a1499..eafd6f6f1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -498,6 +498,13 @@ class TensorBase return TensorScanProdOp(derived(), axis); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanOp + scan(const Index& axis, const Reducer& reducer) const { + return TensorScanOp(derived(), axis, reducer); + } + // Reductions. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorReductionOp, const Dims, const Derived> From c2a102345f627e4cd1908dad03e6ef0cbb2170c0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 3 Jun 2016 17:27:08 -0700 Subject: [PATCH 02/85] Improved the performance of full reductions. AFTER: BM_fullReduction/10 4541 4543 154017 21.0M items/s BM_fullReduction/64 5191 5193 100000 752.5M items/s BM_fullReduction/512 9588 9588 71361 25.5G items/s BM_fullReduction/4k 244314 244281 2863 64.0G items/s BM_fullReduction/5k 359382 359363 1946 64.8G items/s BEFORE: BM_fullReduction/10 9085 9087 74395 10.5M items/s BM_fullReduction/64 9478 9478 72014 412.1M items/s BM_fullReduction/512 14643 14646 46902 16.7G items/s BM_fullReduction/4k 260338 260384 2678 60.0G items/s BM_fullReduction/5k 385076 385178 1818 60.5G items/s --- .../Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 37 +++++++++++-- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 4 +- .../CXX11/src/Tensor/TensorReductionCuda.h | 52 +++++++++++++++---- 3 files changed, 76 insertions(+), 17 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 6c12b2ed8..1468caa23 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -12,6 +12,8 @@ namespace Eigen { +static const int kCudaScratchSize = 1024; + // This defines an interface that GPUDevice can take to use // CUDA streams underneath. class StreamInterface { @@ -27,6 +29,12 @@ class StreamInterface { // Return a scratchpad buffer of size 1k virtual void* scratchpad() const = 0; + + // Return a semaphore. The semaphore is initially initialized to 0, and + // each kernel using it is responsible for resetting to 0 upon completion + // to maintain the invariant that the semaphore is always equal to 0 upon + // each kernel start. + virtual unsigned int* semaphore() const = 0; }; static cudaDeviceProp* m_deviceProperties; @@ -65,12 +73,12 @@ static const cudaStream_t default_stream = cudaStreamDefault; class CudaStreamDevice : public StreamInterface { public: // Use the default stream on the current device - CudaStreamDevice() : stream_(&default_stream), scratch_(NULL) { + CudaStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) { cudaGetDevice(&device_); initializeDeviceProp(); } // Use the default stream on the specified device - CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL) { + CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) { initializeDeviceProp(); } // Use the specified stream. Note that it's the @@ -78,7 +86,7 @@ class CudaStreamDevice : public StreamInterface { // the specified device. If no device is specified the code // assumes that the stream is associated to the current gpu device. CudaStreamDevice(const cudaStream_t* stream, int device = -1) - : stream_(stream), device_(device), scratch_(NULL) { + : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) { if (device < 0) { cudaGetDevice(&device_); } else { @@ -123,15 +131,27 @@ class CudaStreamDevice : public StreamInterface { virtual void* scratchpad() const { if (scratch_ == NULL) { - scratch_ = allocate(1024); + scratch_ = allocate(kCudaScratchSize + sizeof(unsigned int)); } return scratch_; } + virtual unsigned int* semaphore() const { + if (semaphore_ == NULL) { + char* scratch = static_cast(scratchpad()) + kCudaScratchSize; + semaphore_ = reinterpret_cast(scratch); + cudaError_t err = cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + } + return semaphore_; + } + private: const cudaStream_t* stream_; int device_; mutable void* scratch_; + mutable unsigned int* semaphore_; }; struct GpuDevice { @@ -174,6 +194,15 @@ struct GpuDevice { #endif } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const { +#ifndef __CUDA_ARCH__ + return stream_->semaphore(); +#else + eigen_assert(false && "The default device should be used instead to generate kernel code"); + return NULL; +#endif + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { #ifndef __CUDA_ARCH__ cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 99a09c058..6ddf824e9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -316,7 +316,7 @@ struct OuterReducer { #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template -__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); +__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); #ifdef EIGEN_HAS_CUDA_FP16 @@ -616,7 +616,7 @@ struct TensorEvaluator, Device> template friend struct internal::FullReducerShard; #endif #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) - template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*); + template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); #ifdef EIGEN_HAS_CUDA_FP16 template friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); template friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 45087a9a4..0d1a098b7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -112,17 +112,40 @@ __global__ void ReductionInitKernel(const CoeffType val, Index num_preserved_coe } } + template __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs, - typename Self::CoeffReturnType* output) { + typename Self::CoeffReturnType* output, unsigned int* semaphore) { + // Initialize the output value const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x; - - // Initialize the output value if it wasn't initialized by the ReductionInitKernel - if (gridDim.x == 1 && first_index == 0) { - *output = reducer.initialize(); - __syncthreads(); + if (gridDim.x == 1) { + if (first_index == 0) { + *output = reducer.initialize(); + } } + else { + if (threadIdx.x == 0) { + unsigned int block = atomicCAS(semaphore, 0u, 1u); + if (block == 0) { + // We're the first block to run, initialize the output value + atomicExch(output, reducer.initialize()); + unsigned int old = atomicExch(semaphore, 2u); + assert(old == 1u); + } + else { + // Use atomicCAS here to ensure that the reads aren't cached + unsigned int val = atomicCAS(semaphore, 2u, 2u); + while (val < 2u) { + val = atomicCAS(semaphore, 2u, 2u); + } + } + } + } + + __syncthreads(); + + eigen_assert(gridDim.x == 1 || *semaphore >= 2u); typename Self::CoeffReturnType accum = reducer.initialize(); Index max_iter = numext::mini(num_coeffs - first_index, NumPerThread*BlockSize); @@ -141,6 +164,15 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num if ((threadIdx.x & (warpSize - 1)) == 0) { atomicReduce(output, accum, reducer); } + + if (gridDim.x > 1 && threadIdx.x == 0) { + unsigned int ticket = atomicInc(semaphore, UINT_MAX); + assert(ticket >= 2u); + if (ticket == gridDim.x + 1) { + // We're the last block, reset the semaphore + *semaphore = 0; + } + } } @@ -246,15 +278,13 @@ struct FullReductionLauncher { const int num_per_thread = 128; const int num_blocks = divup(num_coeffs, block_size * num_per_thread); + unsigned int* semaphore = NULL; if (num_blocks > 1) { - // We initialize the outputs outside the reduction kernel when we can't be sure that there - // won't be a race conditions between multiple thread blocks. - LAUNCH_CUDA_KERNEL((ReductionInitKernel), - 1, 32, 0, device, reducer.initialize(), 1, output); + semaphore = device.semaphore(); } LAUNCH_CUDA_KERNEL((FullReductionKernel), - num_blocks, block_size, 0, device, reducer, self, num_coeffs, output); + num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore); } }; From c21eaedce617de17666d5d92eef594eb10467cda Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sat, 4 Jun 2016 07:47:04 -0700 Subject: [PATCH 03/85] Use array_prod to compute the number of elements contained in the input tensor expression --- unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 6ddf824e9..04ba45a8f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -558,7 +558,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + PacketSize - 1 < dimensions().TotalSize()); + eigen_assert(index + PacketSize - 1 < internal::array_prod(dimensions())); EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; if (ReducingInnerMostDims) { From 02db4e1a82e6059cc217d6aa57bcc5ac6342eb37 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sat, 4 Jun 2016 08:21:17 -0700 Subject: [PATCH 04/85] Disable the tensor tests when using msvc since older versions of the compiler fail to handle this code --- unsupported/test/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 2d65eb0cd..ff0ca75c3 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -112,6 +112,10 @@ ei_add_test(kronecker_product) # TODO: The following test names are prefixed with the cxx11 string, since historically # the tests depended on c++11. This isn't the case anymore so we ought to rename them. +# FIXME: Old versions of MSVC fail to compile this code, so we just disable these tests +# when using visual studio. We should make the check more strict to enable the tests for +# newer versions of MSVC. +if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") ei_add_test(cxx11_float16) ei_add_test(cxx11_tensor_dimension) ei_add_test(cxx11_tensor_map) @@ -130,7 +134,8 @@ ei_add_test(cxx11_tensor_io) if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") # This test requires __uint128_t which is only available on 64bit systems ei_add_test(cxx11_tensor_uint128) -endif() +endif() +endif() if(EIGEN_TEST_CXX11) # It should be safe to always run these tests as there is some fallback code for From 39baff850c2f4fe1fee3b7a3918ba62a526e4f08 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Thu, 2 Jun 2016 17:04:19 -0700 Subject: [PATCH 05/85] Add TernaryFunctors and the betainc SpecialFunction. TernaryFunctors and their executors allow operations on 3-tuples of inputs. API fully implemented for Arrays and Tensors based on binary functors. Ported the cephes betainc function (regularized incomplete beta integral) to Eigen, with support for CPU and GPU, floats, doubles, and half types. Added unit tests in array.cpp and cxx11_tensor_cuda.cu Collapsed revision * Merged helper methods for betainc across floats and doubles. * Added TensorGlobalFunctions with betainc(). Removed betainc() from TensorBase. * Clean up CwiseTernaryOp checks, change igamma_helper to cephes_helper. * betainc: merge incbcf and incbd into incbeta_cfe. and more cleanup. * Update TernaryOp and SpecialFunctions (betainc) based on review comments. --- Eigen/Core | 2 + Eigen/src/Core/CoreEvaluators.h | 99 ++++ Eigen/src/Core/CwiseTernaryOp.h | 212 ++++++++ Eigen/src/Core/GenericPacketMath.h | 5 + Eigen/src/Core/GlobalFunctions.h | 22 + Eigen/src/Core/SpecialFunctions.h | 512 +++++++++++++++++- Eigen/src/Core/arch/CUDA/Half.h | 3 + Eigen/src/Core/arch/CUDA/MathFunctions.h | 18 + Eigen/src/Core/arch/CUDA/PacketMath.h | 6 +- Eigen/src/Core/functors/BinaryFunctors.h | 2 +- Eigen/src/Core/functors/TernaryFunctors.h | 47 ++ Eigen/src/Core/util/ForwardDeclarations.h | 2 + Eigen/src/Core/util/StaticAssert.h | 4 +- test/array.cpp | 113 +++- unsupported/Eigen/CXX11/Tensor | 1 + .../Eigen/CXX11/src/Tensor/TensorBase.h | 1 - .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 81 +++ .../Eigen/CXX11/src/Tensor/TensorExpr.h | 96 ++++ .../src/Tensor/TensorForwardDeclarations.h | 1 + .../CXX11/src/Tensor/TensorGlobalFunctions.h | 33 ++ unsupported/test/cxx11_tensor_cuda.cu | 150 +++++ 21 files changed, 1389 insertions(+), 21 deletions(-) create mode 100644 Eigen/src/Core/CwiseTernaryOp.h create mode 100644 Eigen/src/Core/functors/TernaryFunctors.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h diff --git a/Eigen/Core b/Eigen/Core index 25cb87930..1f3a6504e 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -371,6 +371,7 @@ using std::ptrdiff_t; #include "src/Core/arch/Default/Settings.h" +#include "src/Core/functors/TernaryFunctors.h" #include "src/Core/functors/BinaryFunctors.h" #include "src/Core/functors/UnaryFunctors.h" #include "src/Core/functors/NullaryFunctors.h" @@ -403,6 +404,7 @@ using std::ptrdiff_t; #include "src/Core/PlainObjectBase.h" #include "src/Core/Matrix.h" #include "src/Core/Array.h" +#include "src/Core/CwiseTernaryOp.h" #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseNullaryOp.h" diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 5a5186a57..7ba92963c 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -41,10 +41,19 @@ template<> struct storage_kind_to_shape { typedef Transp // We currently distinguish the following kind of evaluators: // - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) // - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - ternary_evaluator for expression taking three arguments (CwiseTernaryOp) // - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. // - mapbase_evaluator for Map, Block, Ref // - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) +template< typename T, + typename Arg1Kind = typename evaluator_traits::Kind, + typename Arg2Kind = typename evaluator_traits::Kind, + typename Arg3Kind = typename evaluator_traits::Kind, + typename Arg1Scalar = typename traits::Scalar, + typename Arg2Scalar = typename traits::Scalar, + typename Arg3Scalar = typename traits::Scalar> struct ternary_evaluator; + template< typename T, typename LhsKind = typename evaluator_traits::Kind, typename RhsKind = typename evaluator_traits::Kind, @@ -442,6 +451,96 @@ protected: evaluator m_argImpl; }; +// -------------------- CwiseTernaryOp -------------------- + +// this is a ternary expression +template +struct evaluator > + : public ternary_evaluator > +{ + typedef CwiseTernaryOp XprType; + typedef ternary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct ternary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseTernaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + Arg1Flags = evaluator::Flags, + Arg2Flags = evaluator::Flags, + Arg3Flags = evaluator::Flags, + SameType = is_same::value && is_same::value, + StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit), + Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & ( + HereditaryBits + | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN( + EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment), + evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_arg1Impl.template packet(row, col), + m_arg2Impl.template packet(row, col), + m_arg3Impl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + +protected: + const TernaryOp m_functor; + evaluator m_arg1Impl; + evaluator m_arg2Impl; + evaluator m_arg3Impl; +}; + // -------------------- CwiseBinaryOp -------------------- // this is a binary expression diff --git a/Eigen/src/Core/CwiseTernaryOp.h b/Eigen/src/Core/CwiseTernaryOp.h new file mode 100644 index 000000000..fe71c07cf --- /dev/null +++ b/Eigen/src/Core/CwiseTernaryOp.h @@ -0,0 +1,212 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_TERNARY_OP_H +#define EIGEN_CWISE_TERNARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > { + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef typename remove_all::type Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Arg1, Arg2, and Arg3 to have the same scalar type + // (see CwiseTernaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of::type Scalar; + EIGEN_STATIC_ASSERT( + (internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT( + (internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT( + (internal::is_same::StorageIndex, + typename internal::traits::StorageIndex>::value), + STORAGE_INDEX_MUST_MATCH) + EIGEN_STATIC_ASSERT( + (internal::is_same::StorageIndex, + typename internal::traits::StorageIndex>::value), + STORAGE_INDEX_MUST_MATCH) + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + typedef typename Arg1::Nested Arg1Nested; + typedef typename Arg2::Nested Arg2Nested; + typedef typename Arg3::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + enum { Flags = _Arg1Nested::Flags & RowMajorBit }; +}; +} // end namespace internal + +template +class CwiseTernaryOpImpl; + +/** \class CwiseTernaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise ternary operator is + * applied to two expressions + * + * \tparam TernaryOp template functor implementing the operator + * \tparam Arg1Type the type of the first argument + * \tparam Arg2Type the type of the second argument + * \tparam Arg3Type the type of the third argument + * + * This class represents an expression where a coefficient-wise ternary + * operator is applied to three expressions. + * It is the return type of ternary operators, by which we mean only those + * ternary operators where + * all three arguments are Eigen expressions. + * For example, the return type of betainc(matrix1, matrix2, matrix3) is a + * CwiseTernaryOp. + * + * Most of the time, this is the only way that it is used, so you typically + * don't have to name + * CwiseTernaryOp types explicitly. + * + * \sa MatrixBase::ternaryExpr(const MatrixBase &, const + * MatrixBase &, const CustomTernaryOp &) const, class CwiseBinaryOp, + * class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseTernaryOp : public CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>, + internal::no_assignment_operator { + EIGEN_STATIC_ASSERT( + (internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT( + (internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + + public: + typedef typename internal::remove_all::type Arg1; + typedef typename internal::remove_all::type Arg2; + typedef typename internal::remove_all::type Arg3; + + typedef typename CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp) + + typedef typename internal::ref_selector::type Arg1Nested; + typedef typename internal::ref_selector::type Arg2Nested; + typedef typename internal::ref_selector::type Arg3Nested; + typedef typename internal::remove_reference::type _Arg1Nested; + typedef typename internal::remove_reference::type _Arg2Nested; + typedef typename internal::remove_reference::type _Arg3Nested; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2, + const Arg3& a3, + const TernaryOp& func = TernaryOp()) + : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) { + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3) + eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && + a1.rows() == a3.rows() && a1.cols() == a3.cols()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg3.rows(); + else if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg2.rows(); + else + return m_arg1.rows(); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg3.cols(); + else if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg2.cols(); + else + return m_arg1.cols(); + } + + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg1Nested& arg1() const { return m_arg1; } + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg2Nested& arg2() const { return m_arg2; } + /** \returns the third argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg3Nested& arg3() const { return m_arg3; } + /** \returns the functor representing the ternary operation */ + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + protected: + Arg1Nested m_arg1; + Arg2Nested m_arg2; + Arg3Nested m_arg3; + const TernaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseTernaryOpImpl + : public internal::generic_xpr_base< + CwiseTernaryOp >::type { + public: + typedef typename internal::generic_xpr_base< + CwiseTernaryOp >::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_TERNARY_OP_H diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 82fabbb70..76a75dee1 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -83,6 +83,7 @@ struct default_packet_traits HasErfc = 0, HasIGamma = 0, HasIGammac = 0, + HasBetaInc = 0, HasRound = 0, HasFloor = 0, @@ -466,6 +467,10 @@ Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); } +/** \internal \returns the complementary incomplete gamma function betainc(\a a, \a b, \a x) */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 9c97ccb0e..e489cefec 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -213,6 +213,28 @@ namespace Eigen ); } + /** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given arrays. + * + * This function computes the regularized incomplete beta function (integral). + * + * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types, + * or float/double in non c++11 mode, the user has to provide implementations of betainc(T,T,T) for any scalar + * type T to be supported. + * + * \sa Eigen::betainc(), Eigen::lgamma() + */ + template + inline const Eigen::CwiseTernaryOp, const ArgADerived, const ArgBDerived, const ArgXDerived> + betainc(const Eigen::ArrayBase& a, const Eigen::ArrayBase& b, const Eigen::ArrayBase& x) + { + return Eigen::CwiseTernaryOp, const ArgADerived, const ArgBDerived, const ArgXDerived>( + a.derived(), + b.derived(), + x.derived() + ); + } + + /** \returns an expression of the coefficient-wise zeta(\a x, \a q) to the given arrays. * * It returns the Riemann zeta function of two arguments \a x and \a q: diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index f34c7bcda..0dd9a1dc3 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -392,17 +392,19 @@ struct igammac_retval { typedef Scalar type; }; -// NOTE: igamma_helper is also used to implement zeta +// NOTE: cephes_helper is also used to implement zeta template -struct igamma_helper { +struct cephes_helper { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar biginv() { assert(false && "biginv not supported for this type"); return 0.0; } }; template <> -struct igamma_helper { +struct cephes_helper { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float machep() { return NumTraits::epsilon() / 2; // 1.0 - machep == 1.0 @@ -412,10 +414,15 @@ struct igamma_helper { // use epsneg (1.0 - epsneg == 1.0) return 1.0f / (NumTraits::epsilon() / 2); } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float biginv() { + // epsneg + return machep(); + } }; template <> -struct igamma_helper { +struct cephes_helper { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double machep() { return NumTraits::epsilon() / 2; // 1.0 - machep == 1.0 @@ -424,6 +431,11 @@ struct igamma_helper { static EIGEN_STRONG_INLINE double big() { return 1.0 / NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double biginv() { + // inverse of eps + return NumTraits::epsilon(); + } }; #if !EIGEN_HAS_C99_MATH @@ -538,10 +550,10 @@ struct igammac_impl { const Scalar zero = 0; const Scalar one = 1; const Scalar two = 2; - const Scalar machep = igamma_helper::machep(); + const Scalar machep = cephes_helper::machep(); const Scalar maxlog = numext::log(NumTraits::highest()); - const Scalar big = igamma_helper::big(); - const Scalar biginv = 1 / big; + const Scalar big = cephes_helper::big(); + const Scalar biginv = cephes_helper::biginv(); const Scalar inf = NumTraits::infinity(); Scalar ans, ax, c, yc, r, t, y, z; @@ -590,7 +602,9 @@ struct igammac_impl { qkm2 *= biginv; qkm1 *= biginv; } - if (t <= machep) break; + if (t <= machep) { + break; + } } return (ans * ax); @@ -724,7 +738,7 @@ struct igamma_impl { EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) { const Scalar zero = 0; const Scalar one = 1; - const Scalar machep = igamma_helper::machep(); + const Scalar machep = cephes_helper::machep(); const Scalar maxlog = numext::log(NumTraits::highest()); Scalar ans, ax, c, r; @@ -746,7 +760,9 @@ struct igamma_impl { r += one; c *= x/r; ans += c; - if (c/ans <= machep) break; + if (c/ans <= machep) { + break; + } } return (ans * ax / a); @@ -899,7 +915,7 @@ struct zeta_impl { const Scalar maxnum = NumTraits::infinity(); const Scalar zero = 0.0, half = 0.5, one = 1.0; - const Scalar machep = igamma_helper::machep(); + const Scalar machep = cephes_helper::machep(); const Scalar nan = NumTraits::quiet_NaN(); if( x == one ) @@ -947,8 +963,9 @@ struct zeta_impl { t = a*b/A[i]; s = s + t; t = numext::abs(t/s); - if( t < machep ) - return s; + if( t < machep ) { + break; + } k += one; a *= x + k; b /= w; @@ -1007,6 +1024,467 @@ struct polygamma_impl { #endif // EIGEN_HAS_C99_MATH +/************************************************************************************************ + * Implementation of betainc (incomplete beta integral), based on Cephes but requires C++11/C99 * + ************************************************************************************************/ + +template +struct betainc_retval { + typedef Scalar type; +}; + +#if !EIGEN_HAS_C99_MATH + +template +struct betainc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) { + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +#else + +template +struct betainc_impl { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) { + /* betaincf.c + * + * Incomplete beta integral + * + * + * SYNOPSIS: + * + * float a, b, x, y, betaincf(); + * + * y = betaincf( a, b, x ); + * + * + * DESCRIPTION: + * + * Returns incomplete beta integral of the arguments, evaluated + * from zero to x. The function is defined as + * + * x + * - - + * | (a+b) | | a-1 b-1 + * ----------- | t (1-t) dt. + * - - | | + * | (a) | (b) - + * 0 + * + * The domain of definition is 0 <= x <= 1. In this + * implementation a and b are restricted to positive values. + * The integral from x to 1 may be obtained by the symmetry + * relation + * + * 1 - betainc( a, b, x ) = betainc( b, a, 1-x ). + * + * The integral is evaluated by a continued fraction expansion. + * If a < 1, the function calls itself recursively after a + * transformation to increase a to a+1. + * + * ACCURACY (float): + * + * Tested at random points (a,b,x) with a and b in the indicated + * interval and x between 0 and 1. + * + * arithmetic domain # trials peak rms + * Relative error: + * IEEE 0,30 10000 3.7e-5 5.1e-6 + * IEEE 0,100 10000 1.7e-4 2.5e-5 + * The useful domain for relative error is limited by underflow + * of the single precision exponential function. + * Absolute error: + * IEEE 0,30 100000 2.2e-5 9.6e-7 + * IEEE 0,100 10000 6.5e-5 3.7e-6 + * + * Larger errors may occur for extreme ratios of a and b. + * + * ACCURACY (double): + * arithmetic domain # trials peak rms + * IEEE 0,5 10000 6.9e-15 4.5e-16 + * IEEE 0,85 250000 2.2e-13 1.7e-14 + * IEEE 0,1000 30000 5.3e-12 6.3e-13 + * IEEE 0,10000 250000 9.3e-11 7.1e-12 + * IEEE 0,100000 10000 8.7e-10 4.8e-11 + * Outputs smaller than the IEEE gradual underflow threshold + * were excluded from these statistics. + * + * ERROR MESSAGES: + * message condition value returned + * incbet domain x<0, x>1 nan + * incbet underflow nan + */ + + EIGEN_STATIC_ASSERT((internal::is_same::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + +/* Continued fraction expansion #1 for incomplete beta integral (small_branch = True) + * Continued fraction expansion #2 for incomplete beta integral (small_branch = False) + */ +template +struct incbeta_cfe { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x, bool small_branch) { + EIGEN_STATIC_ASSERT((internal::is_same::value || + internal::is_same::value), + THIS_TYPE_IS_NOT_SUPPORTED); + const Scalar big = cephes_helper::big(); + const Scalar machep = cephes_helper::machep(); + const Scalar biginv = cephes_helper::biginv(); + + const Scalar zero = 0; + const Scalar one = 1; + const Scalar two = 2; + + Scalar xk, pk, pkm1, pkm2, qk, qkm1, qkm2; + Scalar k1, k2, k3, k4, k5, k6, k7, k8, k26update; + Scalar ans; + int n; + + const int num_iters = (internal::is_same::value) ? 100 : 300; + const Scalar thresh = + (internal::is_same::value) ? machep : Scalar(3) * machep; + Scalar r = (internal::is_same::value) ? zero : one; + + if (small_branch) { + k1 = a; + k2 = a + b; + k3 = a; + k4 = a + one; + k5 = one; + k6 = b - one; + k7 = k4; + k8 = a + two; + k26update = one; + } else { + k1 = a; + k2 = b - one; + k3 = a; + k4 = a + one; + k5 = one; + k6 = a + b; + k7 = a + one; + k8 = a + two; + k26update = -one; + x = x / (one - x); + } + + pkm2 = zero; + qkm2 = one; + pkm1 = one; + qkm1 = one; + ans = one; + n = 0; + + do { + xk = -(x * k1 * k2) / (k3 * k4); + pk = pkm1 + pkm2 * xk; + qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + xk = (x * k5 * k6) / (k7 * k8); + pk = pkm1 + pkm2 * xk; + qk = qkm1 + qkm2 * xk; + pkm2 = pkm1; + pkm1 = pk; + qkm2 = qkm1; + qkm1 = qk; + + if (qk != zero) { + r = pk / qk; + if (numext::abs(ans - r) < numext::abs(r) * thresh) { + return r; + } + ans = r; + } + + k1 += one; + k2 += k26update; + k3 += two; + k4 += two; + k5 += one; + k6 -= k26update; + k7 += two; + k8 += two; + + if ((numext::abs(qk) + numext::abs(pk)) > big) { + pkm2 *= biginv; + pkm1 *= biginv; + qkm2 *= biginv; + qkm1 *= biginv; + } + if ((numext::abs(qk) < biginv) || (numext::abs(pk) < biginv)) { + pkm2 *= big; + pkm1 *= big; + qkm2 *= big; + qkm1 *= big; + } + } while (++n < num_iters); + + return ans; + } +}; + +/* Helper functions depending on the Scalar type */ +template +struct betainc_helper {}; + +template <> +struct betainc_helper { + /* Core implementation, assumes a large (> 1.0) */ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float incbsa(float aa, float bb, + float xx) { + float ans, a, b, t, x, onemx; + bool reversed_a_b = false; + + onemx = 1.0f - xx; + + /* see if x is greater than the mean */ + if (xx > (aa / (aa + bb))) { + reversed_a_b = true; + a = bb; + b = aa; + t = xx; + x = onemx; + } else { + a = aa; + b = bb; + t = onemx; + x = xx; + } + + /* Choose expansion for optimal convergence */ + if (b > 10.0f) { + if (numext::abs(b * x / a) < 0.3f) { + t = betainc_helper::incbps(a, b, x); + if (reversed_a_b) t = 1.0f - t; + return t; + } + } + + ans = x * (a + b - 2.0f) / (a - 1.0f); + if (ans < 1.0f) { + ans = incbeta_cfe::run(a, b, x, true /* small_branch */); + t = b * numext::log(t); + } else { + ans = incbeta_cfe::run(a, b, x, false /* small_branch */); + t = (b - 1.0f) * numext::log(t); + } + + t += a * numext::log(x) + lgamma_impl::run(a + b) - + lgamma_impl::run(a) - lgamma_impl::run(b); + t += numext::log(ans / a); + t = numext::exp(t); + + if (reversed_a_b) t = 1.0f - t; + return t; + } + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float incbps(float a, float b, float x) { + float t, u, y, s; + const float machep = cephes_helper::machep(); + + y = a * numext::log(x) + (b - 1.0f) * numext::log1p(-x) - numext::log(a); + y -= lgamma_impl::run(a) + lgamma_impl::run(b); + y += lgamma_impl::run(a + b); + + t = x / (1.0f - x); + s = 0.0f; + u = 1.0f; + do { + b -= 1.0f; + if (b == 0.0f) { + break; + } + a += 1.0f; + u *= t * b / a; + s += u; + } while (numext::abs(u) > machep); + + return numext::exp(y) * (1.0f + s); + } +}; + +template <> +struct betainc_impl { + EIGEN_DEVICE_FUNC + static float run(float a, float b, float x) { + const float nan = NumTraits::quiet_NaN(); + float ans, t; + + if (a <= 0.0f) return nan; + if (b <= 0.0f) return nan; + if ((x <= 0.0f) || (x >= 1.0f)) { + if (x == 0.0f) return 0.0f; + if (x == 1.0f) return 1.0f; + // mtherr("betaincf", DOMAIN); + return nan; + } + + /* transformation for small aa */ + if (a <= 1.0f) { + ans = betainc_helper::incbsa(a + 1.0f, b, x); + t = a * numext::log(x) + b * numext::log1p(-x) + + lgamma_impl::run(a + b) - lgamma_impl::run(a + 1.0f) - + lgamma_impl::run(b); + return (ans + numext::exp(t)); + } else { + return betainc_helper::incbsa(a, b, x); + } + } +}; + +template <> +struct betainc_helper { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double incbps(double a, double b, double x) { + const double machep = cephes_helper::machep(); + + double s, t, u, v, n, t1, z, ai; + + ai = 1.0 / a; + u = (1.0 - b) * x; + v = u / (a + 1.0); + t1 = v; + t = u; + n = 2.0; + s = 0.0; + z = machep * ai; + while (numext::abs(v) > z) { + u = (n - b) * x / n; + t *= u; + v = t / (a + n); + s += v; + n += 1.0; + } + s += t1; + s += ai; + + u = a * numext::log(x); + // TODO: gamma() is not directly implemented in Eigen. + /* + if ((a + b) < maxgam && numext::abs(u) < maxlog) { + t = gamma(a + b) / (gamma(a) * gamma(b)); + s = s * t * pow(x, a); + } else { + */ + t = lgamma_impl::run(a + b) - lgamma_impl::run(a) - + lgamma_impl::run(b) + u + numext::log(s); + return s = exp(t); + } +}; + +template <> +struct betainc_impl { + EIGEN_DEVICE_FUNC + static double run(double aa, double bb, double xx) { + const double nan = NumTraits::quiet_NaN(); + const double machep = cephes_helper::machep(); + // const double maxgam = 171.624376956302725; + + double a, b, t, x, xc, w, y; + bool reversed_a_b = false; + + if (aa <= 0.0 || bb <= 0.0) { + return nan; // goto domerr; + } + + if ((xx <= 0.0) || (xx >= 1.0)) { + if (xx == 0.0) return (0.0); + if (xx == 1.0) return (1.0); + // mtherr("incbet", DOMAIN); + return nan; + } + + if ((bb * xx) <= 1.0 && xx <= 0.95) { + return betainc_helper::incbps(aa, bb, xx); + } + + w = 1.0 - xx; + + /* Reverse a and b if x is greater than the mean. */ + if (xx > (aa / (aa + bb))) { + reversed_a_b = true; + a = bb; + b = aa; + xc = xx; + x = w; + } else { + a = aa; + b = bb; + xc = w; + x = xx; + } + + if (reversed_a_b && (b * x) <= 1.0 && x <= 0.95) { + t = betainc_helper::incbps(a, b, x); + if (t <= machep) { + t = 1.0 - machep; + } else { + t = 1.0 - t; + } + return t; + } + + /* Choose expansion for better convergence. */ + y = x * (a + b - 2.0) - (a - 1.0); + if (y < 0.0) { + w = incbeta_cfe::run(a, b, x, true /* small_branch */); + } else { + w = incbeta_cfe::run(a, b, x, false /* small_branch */) / xc; + } + + /* Multiply w by the factor + a b _ _ _ + x (1-x) | (a+b) / ( a | (a) | (b) ) . */ + + y = a * numext::log(x); + t = b * numext::log(xc); + // TODO: gamma is not directly implemented in Eigen. + /* + if ((a + b) < maxgam && numext::abs(y) < maxlog && numext::abs(t) < maxlog) + { + t = pow(xc, b); + t *= pow(x, a); + t /= a; + t *= w; + t *= gamma(a + b) / (gamma(a) * gamma(b)); + } else { + */ + /* Resort to logarithms. */ + y += t + lgamma_impl::run(a + b) - lgamma_impl::run(a) - + lgamma_impl::run(b); + y += numext::log(w / a); + t = numext::exp(y); + + /* } */ + // done: + + if (reversed_a_b) { + if (t <= machep) { + t = 1.0 - machep; + } else { + t = 1.0 - t; + } + } + return t; + } +}; + +#endif // EIGEN_HAS_C99_MATH + } // end namespace internal namespace numext { @@ -1022,7 +1500,7 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar) digamma(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x); } - + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(zeta, Scalar) zeta(const Scalar& x, const Scalar& q) { @@ -1059,6 +1537,12 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar) return EIGEN_MATHFUNC_IMPL(igammac, Scalar)::run(a, x); } +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar) + betainc(const Scalar& a, const Scalar& b, const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x); +} + } // end namespace numext diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index d4ce2eaf9..87bdbfd1e 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -480,6 +480,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen: template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) { return Eigen::half(Eigen::numext::igammac(static_cast(a), static_cast(x))); } +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) { + return Eigen::half(Eigen::numext::betainc(static_cast(a), static_cast(b), static_cast(x))); +} #endif } // end namespace numext diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index c90ec96a0..8b5e8204f 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -181,6 +181,24 @@ double2 pigammac(const double2& a, const double2& x) return make_double2(igammac(a.x, x.x), igammac(a.y, x.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pbetainc(const float4& a, const float4& b, const float4& x) +{ + using numext::betainc; + return make_float4( + betainc(a.x, b.x, x.x), + betainc(a.y, b.y, x.y), + betainc(a.z, b.z, x.z), + betainc(a.w, b.w, x.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pbetainc(const double2& a, const double2& b, const double2& x) +{ + using numext::betainc; + return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y)); +} + #endif } // end namespace internal diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 25a59066c..ad66399e0 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -44,8 +44,9 @@ template<> struct packet_traits : default_packet_traits HasPolygamma = 1, HasErf = 1, HasErfc = 1, - HasIgamma = 1, + HasIGamma = 1, HasIGammac = 1, + HasBetaInc = 1, HasBlend = 0, }; @@ -68,10 +69,13 @@ template<> struct packet_traits : default_packet_traits HasRsqrt = 1, HasLGamma = 1, HasDiGamma = 1, + HasZeta = 1, + HasPolygamma = 1, HasErf = 1, HasErfc = 1, HasIGamma = 1, HasIGammac = 1, + HasBetaInc = 1, HasBlend = 0, }; diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 5cd8ca950..6eb5b91ce 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -372,7 +372,7 @@ template struct scalar_igamma_op { } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const { - return internal::pigammac(a, x); + return internal::pigamma(a, x); } }; template diff --git a/Eigen/src/Core/functors/TernaryFunctors.h b/Eigen/src/Core/functors/TernaryFunctors.h new file mode 100644 index 000000000..8b9e53062 --- /dev/null +++ b/Eigen/src/Core/functors/TernaryFunctors.h @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TERNARY_FUNCTORS_H +#define EIGEN_TERNARY_FUNCTORS_H + +namespace Eigen { + +namespace internal { + +//---------- associative ternary functors ---------- + +/** \internal + * \brief Template functor to compute the incomplete beta integral betainc(a, b, x) + * + */ +template struct scalar_betainc_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_betainc_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& a, const Scalar& b) const { + using numext::betainc; return betainc(x, a, b); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& x, const Packet& a, const Packet& b) const + { + return internal::pbetainc(x, a, b); + } +}; +template +struct functor_traits > { + enum { + // Guesstimate + Cost = 400 * NumTraits::MulCost + 400 * NumTraits::AddCost, + PacketAccess = packet_traits::HasBetaInc + }; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TERNARY_FUNCTORS_H diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index a102e5457..42e2e75b9 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -91,6 +91,7 @@ template class CwiseNullaryOp; template class CwiseUnaryOp; template class CwiseUnaryView; template class CwiseBinaryOp; +template class CwiseTernaryOp; template class Solve; template class Inverse; @@ -208,6 +209,7 @@ template struct scalar_identity_op; template struct scalar_sign_op; template struct scalar_igamma_op; template struct scalar_igammac_op; +template struct scalar_betainc_op; template struct scalar_product_op; template struct scalar_multiple2_op; diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index 6faaf889a..bee80ca38 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h @@ -98,7 +98,9 @@ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, - THIS_TYPE_IS_NOT_SUPPORTED + THIS_TYPE_IS_NOT_SUPPORTED, + STORAGE_KIND_MUST_MATCH, + STORAGE_INDEX_MUST_MATCH }; }; diff --git a/test/array.cpp b/test/array.cpp index 39a7b856f..8ed1269c2 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -592,16 +592,123 @@ template void array_special_functions() ref << 0.644934066848, 0.394934066848, 0.0399946696496, nan, 293.334565435, 0.445487887616, -2.47810300902e-07, -8.29668781082e-09, -0.434562276666, 0.567742190178, -0.0108615497927; CALL_SUBTEST( verify_component_wise(ref, ref); ); - if(sizeof(RealScalar)>=64) { -// CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res, ref); ); + if(sizeof(RealScalar)>=8) { // double + // Reason for commented line: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232 + // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res, ref); ); CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res, ref); ); } else { -// CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res.head(8), ref.head(8)); ); + // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res.head(8), ref.head(8)); ); CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res.head(8), ref.head(8)); ); } } #endif + +#if EIGEN_HAS_C99_MATH + { + // Inputs and ground truth generated with scipy via: + // a = np.logspace(-3, 3, 5) - 1e-3 + // b = np.logspace(-3, 3, 5) - 1e-3 + // x = np.linspace(-0.1, 1.1, 5) + // (full_a, full_b, full_x) = np.vectorize(lambda a, b, x: (a, b, x))(*np.ix_(a, b, x)) + // full_a = full_a.flatten().tolist() # same for full_b, full_x + // v = scipy.special.betainc(full_a, full_b, full_x).flatten().tolist() + // + // Note in Eigen, we call betainc with arguments in the order (x, a, b). + ArrayType a(125); + ArrayType b(125); + ArrayType x(125); + ArrayType v(125); + ArrayType res(125); + + a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999; + + b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, + 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, + 0.999, 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999; + + x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, + -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1; + + v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan, + 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan, + 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan, + 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, + nan, nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256, + 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001, + 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403, + 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999, + 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan, + 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, + nan, nan, 7.864342668429763e-23, 3.015969667594166e-10, + 0.0008598571564165444, nan, nan, 6.031987710123844e-08, + 0.5000000000000007, 0.9999999396801229, nan, nan, 0.9999999999999999, + 0.9999999999999999, 0.9999999999999999, nan, nan, nan, nan, nan, nan, + nan, 0.0, 7.029920380986636e-306, 2.2450728208591345e-101, nan, nan, + 0.0, 9.275871147869727e-302, 1.2232913026152827e-97, nan, nan, 0.0, + 3.0891393081932924e-252, 2.9303043666183996e-60, nan, nan, + 2.248913486879199e-196, 0.5000000000004947, 0.9999999999999999, nan; + + CALL_SUBTEST(res = betainc(a, b, x); + verify_component_wise(res, v);); + } +#endif } void test_array() diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 859147404..79bac2f67 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -80,6 +80,7 @@ typedef unsigned __int64 uint64_t; #include "src/Tensor/TensorTraits.h" #include "src/Tensor/TensorUInt128.h" #include "src/Tensor/TensorIntDiv.h" +#include "src/Tensor/TensorGlobalFunctions.h" #include "src/Tensor/TensorBase.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index eafd6f6f1..8f3580ba7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -307,7 +307,6 @@ class TensorBase return unaryExpr(internal::scalar_floor_op()); } - // Generic binary operation support. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseBinaryOp diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 31b361c83..4e873011e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -403,6 +403,87 @@ struct TensorEvaluator m_rightImpl; }; +// -------------------- CwiseTernaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseTernaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_arg1Impl(op.arg1Expression(), device), + m_arg2Impl(op.arg2Expression(), device), + m_arg3Impl(op.arg3Expression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use arg2 or arg3 dimensions if they are known at compile time. + return m_arg1Impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_arg1Impl.evalSubExprsIfNeeded(NULL); + m_arg2Impl.evalSubExprsIfNeeded(NULL); + m_arg3Impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_arg1Impl.cleanup(); + m_arg2Impl.cleanup(); + m_arg3Impl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_arg1Impl.costPerCoeff(vectorized) + + m_arg2Impl.costPerCoeff(vectorized) + + m_arg3Impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + + private: + const TernaryOp m_functor; + TensorEvaluator m_arg1Impl; + TensorEvaluator m_arg2Impl; + TensorEvaluator m_arg3Impl; +}; + // -------------------- SelectOp -------------------- diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index ea250d8bc..9509f8002 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -218,6 +218,102 @@ class TensorCwiseBinaryOp : public TensorBase +struct traits > +{ + // Type promotion to handle the case where the types of the args are different. + typedef typename result_of< + TernaryOp(typename Arg1XprType::Scalar, + typename Arg2XprType::Scalar, + typename Arg3XprType::Scalar)>::type Scalar; + EIGEN_STATIC_ASSERT( + (internal::is_same::StorageKind, + typename traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT( + (internal::is_same::StorageKind, + typename traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT( + (internal::is_same::Index, + typename traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + EIGEN_STATIC_ASSERT( + (internal::is_same::Index, + typename traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + typedef traits XprTraits; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename Arg1XprType::Nested Arg1Nested; + typedef typename Arg2XprType::Nested Arg2Nested; + typedef typename Arg3XprType::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseTernaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseTernaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseTernaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp()) + : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg1Expression() const { return m_arg1_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg2Expression() const { return m_arg2_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg3Expression() const { return m_arg3_xpr; } + + protected: + typename Arg1XprType::Nested m_arg1_xpr; + typename Arg1XprType::Nested m_arg2_xpr; + typename Arg3XprType::Nested m_arg3_xpr; + const TernaryOp m_functor; +}; + + namespace internal { template struct traits > diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index a1a18d938..f35275ffb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -21,6 +21,7 @@ template: template class TensorCwiseNullaryOp; template class TensorCwiseUnaryOp; template class TensorCwiseBinaryOp; +template class TensorCwiseTernaryOp; template class TensorSelectOp; template class TensorReductionOp; template class TensorIndexTupleOp; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h new file mode 100644 index 000000000..665b861cf --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H + +namespace Eigen { + +/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors. + * + * This function computes the regularized incomplete beta function (integral). + * + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const + TensorCwiseTernaryOp, + const ADerived, const BDerived, const XDerived> + betainc(const ADerived& a, const BDerived& b, const XDerived& x) { + return TensorCwiseTernaryOp< + internal::scalar_betainc_op, const ADerived, + const BDerived, const XDerived>( + a, b, x, internal::scalar_betainc_op()); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 4026f48f0..284b46803 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -1019,6 +1019,153 @@ void test_cuda_erfc(const Scalar stddev) cudaFree(d_out); } +template +void test_cuda_betainc() +{ + Tensor in_x(125); + Tensor in_a(125); + Tensor in_b(125); + Tensor out(125); + Tensor expected_out(125); + out.setZero(); + + Scalar nan = std::numeric_limits::quiet_NaN(); + + Array x(125); + Array a(125); + Array b(125); + Array v(125); + + a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999; + + b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999; + + x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, + -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1; + + v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan, + 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan, + 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan, + 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, nan, + nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256, + 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001, + 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403, + 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999, + 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan, + 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, nan, + nan, 7.864342668429763e-23, 3.015969667594166e-10, 0.0008598571564165444, + nan, nan, 6.031987710123844e-08, 0.5000000000000007, 0.9999999396801229, + nan, nan, 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, + nan, nan, nan, nan, nan, nan, 0.0, 7.029920380986636e-306, + 2.2450728208591345e-101, nan, nan, 0.0, 9.275871147869727e-302, + 1.2232913026152827e-97, nan, nan, 0.0, 3.0891393081932924e-252, + 2.9303043666183996e-60, nan, nan, 2.248913486879199e-196, + 0.5000000000004947, 0.9999999999999999, nan; + + for (int i = 0; i < 125; ++i) { + in_x(i) = x(i); + in_a(i) = a(i); + in_b(i) = b(i); + expected_out(i) = v(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_in_a; + Scalar* d_in_b; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_a), bytes); + cudaMalloc((void**)(&d_in_b), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_a, in_a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_b, in_b.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap > gpu_in_x(d_in_x, 125); + Eigen::TensorMap > gpu_in_a(d_in_a, 125); + Eigen::TensorMap > gpu_in_b(d_in_b, 125); + Eigen::TensorMap > gpu_out(d_out, 125); + + gpu_out.device(gpu_device) = betainc(gpu_in_a, gpu_in_b, gpu_in_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 1; i < 125; ++i) { + if ((std::isnan)(expected_out(i))) { + VERIFY((std::isnan)(out(i))); + } else { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + } + + cudaFree(d_in_x); + cudaFree(d_in_a); + cudaFree(d_in_b); + cudaFree(d_out); +} + + void test_cxx11_tensor_cuda() { CALL_SUBTEST_1(test_cuda_elementwise_small()); @@ -1086,5 +1233,8 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_5(test_cuda_igamma()); CALL_SUBTEST_5(test_cuda_igammac()); + + CALL_SUBTEST_6(test_cuda_betainc()); + CALL_SUBTEST_6(test_cuda_betainc()); #endif } From d6d39c7ddb127d91ebfa4ea62e93ea51036f1760 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 7 Jun 2016 14:35:08 -0700 Subject: [PATCH 06/85] Added missing EIGEN_DEVICE_FUNC --- unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 52cfc2824..d34f1e328 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -148,7 +148,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast(m_impl.data()); } - const TensorEvaluator& impl() const { return m_impl; } + EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } protected: TensorEvaluator m_impl; From 8fd57a97f203edac3f7e8681eafe752294386a24 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 7 Jun 2016 18:22:18 -0700 Subject: [PATCH 07/85] Enable the vectorization of adds and mults of fp16 --- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 51386506f..959dff886 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -28,6 +28,8 @@ template<> struct packet_traits : default_packet_traits AlignedOnScalar = 1, size=2, HasHalfPacket = 0, + HasAdd = 1, + HasMul = 1, HasDiv = 1, HasSqrt = 1, HasRsqrt = 1, From 9dd9d58273362d643eaa0b8f4f16f8aa3d5ef6cd Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Wed, 8 Jun 2016 15:36:42 +0200 Subject: [PATCH 08/85] Copied a regression test from 3.2 branch. --- test/geo_homogeneous.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/geo_homogeneous.cpp b/test/geo_homogeneous.cpp index bf63c69ec..305794cdf 100644 --- a/test/geo_homogeneous.cpp +++ b/test/geo_homogeneous.cpp @@ -58,6 +58,8 @@ template void homogeneous(void) T2MatrixType t2 = T2MatrixType::Random(); VERIFY_IS_APPROX(t2 * (v0.homogeneous().eval()), t2 * v0.homogeneous()); VERIFY_IS_APPROX(t2 * (m0.colwise().homogeneous().eval()), t2 * m0.colwise().homogeneous()); + VERIFY_IS_APPROX(t2 * (v0.homogeneous().asDiagonal()), t2 * hv0.asDiagonal()); + VERIFY_IS_APPROX((v0.homogeneous().asDiagonal()) * t2, hv0.asDiagonal() * t2); VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2, v0.transpose().rowwise().homogeneous() * t2); From 9fc8379328dad5fb74249003f56fb608c304ae4d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 8 Jun 2016 16:39:11 +0200 Subject: [PATCH 09/85] Fix extraction of complex eigenvalue pairs in real generalized eigenvalue problems. --- .../src/Eigenvalues/GeneralizedEigenSolver.h | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index a9d6790d5..07a9ccf46 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -327,13 +327,33 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp } else { - Scalar p = Scalar(0.5) * (m_matS.coeff(i, i) - m_matS.coeff(i+1, i+1)); - Scalar z = sqrt(abs(p * p + m_matS.coeff(i+1, i) * m_matS.coeff(i, i+1))); - m_alphas.coeffRef(i) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, z); - m_alphas.coeffRef(i+1) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, -z); + // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a triangular 2x2 block T + // From the eigen decomposition of T = U * E * U^-1, + // we can extract the eigenvalues of (U^-1 * S * U) / E + // Here, we can take advantage that E = diag(T), and U = [ 1 T_01 ; 0 T_11-T_00], and U^-1 = [1 -T_11/(T_11-T_00) ; 0 1/(T_11-T_00)]. + // Then taking beta=T_00*T_11*(T_11-T_00), we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00) * (T_11-T_00): + + // T = [a b ; 0 c] + // S = [e f ; g h] + RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i, i+1), c = m_realQZ.matrixT().coeff(i+1, i+1); + RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1); + RealScalar d = c-a; + RealScalar gb = g*b; + Matrix A; + A << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, + g*c , (gb+h*d)*a; + + // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal, + // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00): + + Scalar p = Scalar(0.5) * (A.coeff(i, i) - A.coeff(i+1, i+1)); + Scalar z = sqrt(abs(p * p + A.coeff(i+1, i) * A.coeff(i, i+1))); + m_alphas.coeffRef(i) = ComplexScalar(A.coeff(i+1, i+1) + p, z); + m_alphas.coeffRef(i+1) = ComplexScalar(A.coeff(i+1, i+1) + p, -z); + + m_betas.coeffRef(i) = + m_betas.coeffRef(i+1) = a*c*d; - m_betas.coeffRef(i) = m_realQZ.matrixT().coeff(i,i); - m_betas.coeffRef(i+1) = m_realQZ.matrixT().coeff(i,i); i += 2; } } From df095cab104550a8179c28e93d477406dfab6849 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 8 Jun 2016 18:31:19 +0200 Subject: [PATCH 10/85] Fixes for PARDISO: warnings, and defaults to metis+ in-core mode. --- Eigen/src/PardisoSupport/PardisoSupport.h | 35 ++++++++++++++--------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/Eigen/src/PardisoSupport/PardisoSupport.h b/Eigen/src/PardisoSupport/PardisoSupport.h index 80d914f25..091c3970e 100644 --- a/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/Eigen/src/PardisoSupport/PardisoSupport.h @@ -183,7 +183,7 @@ class PardisoImpl : public SparseSolverBase { if(m_isInitialized) // Factorization ran at least once { - internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, m_size,0, 0, 0, m_perm.data(), 0, + internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, internal::convert_index(m_size),0, 0, 0, m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); m_isInitialized = false; } @@ -194,11 +194,11 @@ class PardisoImpl : public SparseSolverBase m_type = type; bool symmetric = std::abs(m_type) < 10; m_iparm[0] = 1; // No solver default - m_iparm[1] = 3; // use Metis for the ordering - m_iparm[2] = 1; // Numbers of processors, value of OMP_NUM_THREADS + m_iparm[1] = 2; // use Metis for the ordering + m_iparm[2] = 0; // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??) m_iparm[3] = 0; // No iterative-direct algorithm m_iparm[4] = 0; // No user fill-in reducing permutation - m_iparm[5] = 0; // Write solution into x + m_iparm[5] = 0; // Write solution into x, b is left unchanged m_iparm[6] = 0; // Not in use m_iparm[7] = 2; // Max numbers of iterative refinement steps m_iparm[8] = 0; // Not in use @@ -219,7 +219,8 @@ class PardisoImpl : public SparseSolverBase m_iparm[26] = 0; // No matrix checker m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0; m_iparm[34] = 1; // C indexing - m_iparm[59] = 1; // Automatic switch between In-Core and Out-of-Core modes + m_iparm[36] = 0; // CSR + m_iparm[59] = 0; // 0 - In-Core ; 1 - Automatic switch between In-Core and Out-of-Core modes ; 2 - Out-of-Core memset(m_pt, 0, sizeof(m_pt)); } @@ -246,7 +247,7 @@ class PardisoImpl : public SparseSolverBase mutable SparseMatrixType m_matrix; mutable ComputationInfo m_info; bool m_analysisIsOk, m_factorizationIsOk; - Index m_type, m_msglvl; + StorageIndex m_type, m_msglvl; mutable void *m_pt[64]; mutable ParameterType m_iparm; mutable IntColVectorType m_perm; @@ -265,10 +266,9 @@ Derived& PardisoImpl::compute(const MatrixType& a) derived().getMatrix(a); Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); - manageErrorCode(error); m_analysisIsOk = true; m_factorizationIsOk = true; @@ -287,7 +287,7 @@ Derived& PardisoImpl::analyzePattern(const MatrixType& a) derived().getMatrix(a); Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 11, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 11, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); @@ -306,8 +306,8 @@ Derived& PardisoImpl::factorize(const MatrixType& a) derived().getMatrix(a); - Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 22, m_size, + Index error; + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 22, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); @@ -354,9 +354,9 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase } Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, m_size, + error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, internal::convert_index(m_size), m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), - m_perm.data(), nrhs, m_iparm.data(), m_msglvl, + m_perm.data(), internal::convert_index(nrhs), m_iparm.data(), m_msglvl, rhs_ptr, x.derived().data()); manageErrorCode(error); @@ -371,6 +371,9 @@ void PardisoImpl::_solve_impl(const MatrixBase &b, MatrixBase * using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * * \implsparsesolverconcept @@ -421,6 +424,9 @@ class PardisoLU : public PardisoImpl< PardisoLU > * using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used. * Upper|Lower can be used to tell both triangular parts can be used as input. @@ -480,6 +486,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT > * For complex matrices, A can also be symmetric only, see the \a Options template parameter. * The vectors or matrices X and B can be either dense or sparse. * + * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: + * \code solver.pardisoParameterArray()[59] = 1; \endcode + * * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> * \tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used. * Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix. From 0beabb4776b887c25977186b5af6811eb49aaa23 Mon Sep 17 00:00:00 2001 From: Abhijit Kundu Date: Wed, 8 Jun 2016 16:12:04 -0400 Subject: [PATCH 11/85] Fixed type conversion from int --- Eigen/src/Geometry/Transform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 1a06c1e35..073f4dcd1 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -1367,7 +1367,7 @@ struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES); Matrix rhs; - rhs.template head() = other; rhs[Dim] = 1; + rhs.template head() = other; rhs[Dim] = typename ResultType::Scalar(1); Matrix res(T.matrix() * rhs); return res.template head(); } From a20d2ec1c02d2629cbfa8871898b64cd22445595 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 16:16:22 +0200 Subject: [PATCH 12/85] Fix shadow variable, and indexing. --- Eigen/src/Eigenvalues/GeneralizedEigenSolver.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index 07a9ccf46..08caed281 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -339,17 +339,17 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1); RealScalar d = c-a; RealScalar gb = g*b; - Matrix A; - A << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, - g*c , (gb+h*d)*a; + Matrix S2; + S2 << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, + g*c , (gb+h*d)*a; // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal, // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00): - Scalar p = Scalar(0.5) * (A.coeff(i, i) - A.coeff(i+1, i+1)); - Scalar z = sqrt(abs(p * p + A.coeff(i+1, i) * A.coeff(i, i+1))); - m_alphas.coeffRef(i) = ComplexScalar(A.coeff(i+1, i+1) + p, z); - m_alphas.coeffRef(i+1) = ComplexScalar(A.coeff(i+1, i+1) + p, -z); + Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1)); + Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1))); + m_alphas.coeffRef(i) = ComplexScalar(S2.coeff(1,1) + p, z); + m_alphas.coeffRef(i+1) = ComplexScalar(S2.coeff(1,1) + p, -z); m_betas.coeffRef(i) = m_betas.coeffRef(i+1) = a*c*d; From 15890c304edbccedc8a989468ed3fc475f428059 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 16:17:27 +0200 Subject: [PATCH 13/85] Add unit test for non symmetric generalized eigenvalues --- test/eigensolver_generalized_real.cpp | 32 ++++++++++++++++++++------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/test/eigensolver_generalized_real.cpp b/test/eigensolver_generalized_real.cpp index a46a2e50e..da14482de 100644 --- a/test/eigensolver_generalized_real.cpp +++ b/test/eigensolver_generalized_real.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2012 Gael Guennebaud +// Copyright (C) 2012-2016 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -10,6 +10,7 @@ #include "main.h" #include #include +#include template void generalized_eigensolver_real(const MatrixType& m) { @@ -21,6 +22,7 @@ template void generalized_eigensolver_real(const MatrixType Index cols = m.cols(); typedef typename MatrixType::Scalar Scalar; + typedef std::complex ComplexScalar; typedef Matrix VectorType; MatrixType a = MatrixType::Random(rows,cols); @@ -31,14 +33,28 @@ template void generalized_eigensolver_real(const MatrixType MatrixType spdB = b.adjoint() * b + b1.adjoint() * b1; // lets compare to GeneralizedSelfAdjointEigenSolver - GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); - GeneralizedEigenSolver eig(spdA, spdB); + { + GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); + GeneralizedEigenSolver eig(spdA, spdB); - VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); + VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); - VectorType realEigenvalues = eig.eigenvalues().real(); - std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); - VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + VectorType realEigenvalues = eig.eigenvalues().real(); + std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); + VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + } + + // non symmetric case: + { + GeneralizedEigenSolver eig(a,b); + for(Index k=0; k tmp = (eig.betas()(k)*a).template cast() - eig.alphas()(k)*b; + if(tmp.norm()>(std::numeric_limits::min)()) + tmp /= tmp.norm(); + VERIFY_IS_MUCH_SMALLER_THAN( std::abs(tmp.determinant()), Scalar(1) ); + } + } // regression test for bug 1098 { @@ -57,7 +73,7 @@ void test_eigensolver_generalized_real() s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(s,s)) ); - // some trivial but implementation-wise tricky cases + // some trivial but implementation-wise special cases CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(1,1)) ); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) ); CALL_SUBTEST_3( generalized_eigensolver_real(Matrix()) ); From c1f9ca925405c8fad126f327b4bdca7f983fc96e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 17:11:03 +0200 Subject: [PATCH 14/85] Update RealQZ to reduce 2x2 diagonal block of T corresponding to non reduced diagonal block of S to positive diagonal form. This step involve a real 2x2 SVD problem. The respective routine is thus in src/misc/ to be shared by both EVD and AVD modules. --- Eigen/Eigenvalues | 1 + Eigen/SVD | 1 + Eigen/src/Eigenvalues/RealQZ.h | 32 +++++++++++++++++++- Eigen/src/SVD/JacobiSVD.h | 32 -------------------- Eigen/src/misc/RealSvd2x2.h | 54 ++++++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 33 deletions(-) create mode 100644 Eigen/src/misc/RealSvd2x2.h diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index ea93eb303..216a6d51d 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -32,6 +32,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/Eigenvalues/Tridiagonalization.h" #include "src/Eigenvalues/RealSchur.h" #include "src/Eigenvalues/EigenSolver.h" diff --git a/Eigen/SVD b/Eigen/SVD index b353f3f54..565d9c90d 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -31,6 +31,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/SVD/UpperBidiagonalization.h" #include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index a62071d42..c4715b954 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -552,7 +552,6 @@ namespace Eigen { m_T.coeffRef(l,l-1) = Scalar(0.0); } - template RealQZ& RealQZ::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ) { @@ -616,6 +615,37 @@ namespace Eigen { } // check if we converged before reaching iterations limit m_info = (local_iter j_left, j_right; + internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right); + + // Apply resulting Jacobi rotations + m_T.applyOnTheLeft(i,i+1,j_left); + m_T.applyOnTheRight(i,i+1,j_right); + m_S.applyOnTheLeft(i,i+1,j_left); + m_S.applyOnTheRight(i,i+1,j_right); + m_T(i,i+1) = Scalar(0); + + if(m_computeQZ) { + m_Q.applyOnTheRight(i,i+1,j_left.transpose()); + m_Z.applyOnTheLeft(i,i+1,j_right.transpose()); + } + + i++; + } + } + } + return *this; } // end compute diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 1940c8294..b83fd7a4d 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h @@ -419,38 +419,6 @@ struct svd_precondition_2x2_block_to_be_real } }; -template -void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, - JacobiRotation *j_left, - JacobiRotation *j_right) -{ - using std::sqrt; - using std::abs; - Matrix m; - m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), - numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); - JacobiRotation rot1; - RealScalar t = m.coeff(0,0) + m.coeff(1,1); - RealScalar d = m.coeff(1,0) - m.coeff(0,1); - if(d == RealScalar(0)) - { - rot1.s() = RealScalar(0); - rot1.c() = RealScalar(1); - } - else - { - // If d!=0, then t/d cannot overflow because the magnitude of the - // entries forming d are not too small compared to the ones forming t. - RealScalar u = t / d; - RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); - rot1.s() = RealScalar(1) / tmp; - rot1.c() = u / tmp; - } - m.applyOnTheLeft(0,1,rot1); - j_right->makeJacobi(m,0,1); - *j_left = rot1 * j_right->transpose(); -} - template struct traits > { diff --git a/Eigen/src/misc/RealSvd2x2.h b/Eigen/src/misc/RealSvd2x2.h new file mode 100644 index 000000000..cdd7777d2 --- /dev/null +++ b/Eigen/src/misc/RealSvd2x2.h @@ -0,0 +1,54 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Benoit Jacob +// Copyright (C) 2013-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REALSVD2X2_H +#define EIGEN_REALSVD2X2_H + +namespace Eigen { + +namespace internal { + +template +void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, + JacobiRotation *j_left, + JacobiRotation *j_right) +{ + using std::sqrt; + using std::abs; + Matrix m; + m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), + numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); + JacobiRotation rot1; + RealScalar t = m.coeff(0,0) + m.coeff(1,1); + RealScalar d = m.coeff(1,0) - m.coeff(0,1); + if(d == RealScalar(0)) + { + rot1.s() = RealScalar(0); + rot1.c() = RealScalar(1); + } + else + { + // If d!=0, then t/d cannot overflow because the magnitude of the + // entries forming d are not too small compared to the ones forming t. + RealScalar u = t / d; + RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); + rot1.s() = RealScalar(1) / tmp; + rot1.c() = u / tmp; + } + m.applyOnTheLeft(0,1,rot1); + j_right->makeJacobi(m,0,1); + *j_left = rot1 * j_right->transpose(); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_REALSVD2X2_H \ No newline at end of file From 2bd59b0e0d667dcdcb6b070596a1bf023e3e88f1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 17:12:03 +0200 Subject: [PATCH 15/85] Take advantage that T is already diagonal in the extraction of generalized complex eigenvalues. --- .../src/Eigenvalues/GeneralizedEigenSolver.h | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index 08caed281..9f43fd544 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -327,24 +327,13 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp } else { - // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a triangular 2x2 block T - // From the eigen decomposition of T = U * E * U^-1, - // we can extract the eigenvalues of (U^-1 * S * U) / E - // Here, we can take advantage that E = diag(T), and U = [ 1 T_01 ; 0 T_11-T_00], and U^-1 = [1 -T_11/(T_11-T_00) ; 0 1/(T_11-T_00)]. - // Then taking beta=T_00*T_11*(T_11-T_00), we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00) * (T_11-T_00): + // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a positive diagonal 2x2 block T + // Then taking beta=T_00*T_11, we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00): - // T = [a b ; 0 c] - // S = [e f ; g h] - RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i, i+1), c = m_realQZ.matrixT().coeff(i+1, i+1); - RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1); - RealScalar d = c-a; - RealScalar gb = g*b; - Matrix S2; - S2 << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, - g*c , (gb+h*d)*a; - - // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal, - // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00): + // T = [a 0] + // [0 b] + RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i+1, i+1); + Matrix S2 = m_matS.template block<2,2>(i,i) * Matrix(b,a).asDiagonal(); Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1)); Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1))); @@ -352,7 +341,7 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp m_alphas.coeffRef(i+1) = ComplexScalar(S2.coeff(1,1) + p, -z); m_betas.coeffRef(i) = - m_betas.coeffRef(i+1) = a*c*d; + m_betas.coeffRef(i+1) = a*b; i += 2; } From e2b383632699684d06ae180b3ad85cfb0189973a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 17:13:33 +0200 Subject: [PATCH 16/85] Include recent changesets that played with product's kernel --- bench/perf_monitoring/gemm/changesets.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bench/perf_monitoring/gemm/changesets.txt b/bench/perf_monitoring/gemm/changesets.txt index fb3e48e99..d00b4603a 100644 --- a/bench/perf_monitoring/gemm/changesets.txt +++ b/bench/perf_monitoring/gemm/changesets.txt @@ -44,4 +44,8 @@ before-evaluators 7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) 7591:09a8e2186610 # 3.3-alpha1 7650:b0f3c8f43025 # help clang inlining +8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) +8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes +8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path +8985:d935df21a082 # Remove the rotating kernel. From aa33446dace833fbf06632e586c80119b3d8ac11 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 08:22:27 -0700 Subject: [PATCH 17/85] Improved support for vectorization of 16-bit floats --- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 86 +++++++++++++++++++ .../Eigen/CXX11/src/Tensor/TensorMeta.h | 5 ++ .../CXX11/src/Tensor/TensorReductionCuda.h | 8 +- 3 files changed, 95 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 3dd32e9d1..bf52e490f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -84,6 +84,14 @@ struct functor_traits > { }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + // Standard reduction functors template struct SumReducer { @@ -119,6 +127,15 @@ template struct SumReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::type::HasAdd + }; +}; + + template struct MeanReducer { static const bool PacketAccess = packet_traits::HasAdd && !NumTraits::IsInteger; @@ -162,6 +179,15 @@ template struct MeanReducer DenseIndex packetCount_; }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::type::HasAdd + }; +}; + + template struct MaxReducer { static const bool PacketAccess = packet_traits::HasMax; @@ -195,6 +221,15 @@ template struct MaxReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::type::HasMax + }; +}; + + template struct MinReducer { static const bool PacketAccess = packet_traits::HasMin; @@ -228,6 +263,14 @@ template struct MinReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::type::HasMin + }; +}; + template struct ProdReducer { @@ -263,6 +306,14 @@ template struct ProdReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::MulCost, + PacketAccess = PacketType::type::HasMul + }; +}; + struct AndReducer { @@ -280,6 +331,15 @@ struct AndReducer } }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + struct OrReducer { static const bool PacketAccess = false; static const bool IsStateful = false; @@ -295,6 +355,15 @@ struct OrReducer { } }; +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + // Argmin/Argmax reducers template struct ArgMaxTupleReducer { @@ -312,6 +381,15 @@ template struct ArgMaxTupleReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + + template struct ArgMinTupleReducer { static const bool PacketAccess = false; @@ -328,6 +406,14 @@ template struct ArgMinTupleReducer } }; +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + // Random number generation namespace { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index b1645d56f..82a905a65 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -54,6 +54,11 @@ struct PacketType { // For CUDA packet types when using a GpuDevice #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +template <> + struct PacketType { + typedef half2 type; + static const int size = 2; + }; template <> struct PacketType { typedef float4 type; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index e82530955..1b4fdd03f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -331,7 +331,7 @@ struct FullReducer { #ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || - (internal::is_same::value && Op::PacketAccess)); + (internal::is_same::value && reducer_traits::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same::value; @@ -346,7 +346,7 @@ struct FullReducer { return; } - FullReductionLauncher::run(self, reducer, device, output, num_coeffs); + FullReductionLauncher::PacketAccess>::run(self, reducer, device, output, num_coeffs); } }; @@ -608,7 +608,7 @@ struct InnerReducer { #ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || - (internal::is_same::value && Op::PacketAccess)); + (internal::is_same::value && reducer_traits::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same::value; @@ -627,7 +627,7 @@ struct InnerReducer { return true; } - return InnerReductionLauncher::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); + return InnerReductionLauncher::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals); } }; From 8f92c26319a6e06cce6d0ba2a252521c8096a2c0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 08:23:42 -0700 Subject: [PATCH 18/85] Improved code formatting --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 61df8032d..0d084141d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -122,7 +122,7 @@ struct TensorEvaluator, Device> { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { - return m_dimensions; + return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { From 14a112ee153f7d8554c3a8848e8a0461c7b82f13 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 08:25:22 -0700 Subject: [PATCH 19/85] Use signed integers more consistently to encode the number of threads to use to evaluate a tensor expression. --- .../CXX11/src/Tensor/TensorContractionThreadPool.h | 12 ++++++------ .../Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index a60a17049..ee16cde9b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -202,7 +202,7 @@ struct TensorEvaluator::numThreads( + int num_threads = TensorCostModel::numThreads( static_cast(n) * m, cost, this->m_device.numThreads()); // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost @@ -301,7 +301,7 @@ struct TensorEvaluator f) const { typedef TensorCostModel CostModel; if (n <= 1 || numThreads() == 1 || - CostModel::numThreads(n, cost, numThreads()) == 1) { + CostModel::numThreads(n, cost, static_cast(numThreads())) == 1) { f(0, n); return; } @@ -242,7 +242,7 @@ struct ThreadPoolDevice { // Recursively divide size into halves until we reach block_size. // Division code rounds mid to block_size, so we are guaranteed to get // block_count leaves that do actual computations. - Barrier barrier(block_count); + Barrier barrier(static_cast(block_count)); std::function handleRange; handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) { if (last - first <= block_size) { @@ -268,7 +268,7 @@ struct ThreadPoolDevice { private: ThreadPoolInterface* pool_; - size_t num_threads_; + int num_threads_; }; From 66796e843df723eeac04d6dc725f6a8b27a574ba Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 08:50:01 -0700 Subject: [PATCH 20/85] Fixed definition of some of the reducer_traits --- unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index bf52e490f..e6ff70460 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -131,7 +131,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = PacketType::type::HasAdd + PacketAccess = packet_traits::type>::HasAdd }; }; @@ -183,7 +183,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = PacketType::type::HasAdd + PacketAccess = packet_traits::type>::HasAdd }; }; @@ -225,7 +225,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = PacketType::type::HasMax + PacketAccess = packet_traits::type>::HasMax }; }; @@ -267,7 +267,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = PacketType::type::HasMin + PacketAccess = packet_traits::type>::HasMin }; }; @@ -310,7 +310,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::MulCost, - PacketAccess = PacketType::type::HasMul + PacketAccess = packet_traits::type>::HasMul }; }; From 37638dafd71e39407d22d4269b32d1c73b84feb8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 9 Jun 2016 10:29:52 -0700 Subject: [PATCH 21/85] Simplified the code that dispatches vectorized reductions on GPU --- .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 10 ++--- .../Eigen/CXX11/src/Tensor/TensorMeta.h | 38 ++++++++++++------- .../CXX11/src/Tensor/TensorReductionCuda.h | 2 +- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index e6ff70460..a8e48fced 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -131,7 +131,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasAdd + PacketAccess = PacketType::HasAdd }; }; @@ -183,7 +183,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasAdd + PacketAccess = PacketType::HasAdd }; }; @@ -225,7 +225,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasMax + PacketAccess = PacketType::HasMax }; }; @@ -267,7 +267,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::AddCost, - PacketAccess = packet_traits::type>::HasMin + PacketAccess = PacketType::HasMin }; }; @@ -310,7 +310,7 @@ template struct reducer_traits, Device> { enum { Cost = NumTraits::MulCost, - PacketAccess = packet_traits::type>::HasMul + PacketAccess = PacketType::HasMul }; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 82a905a65..0f3778e6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -47,27 +47,39 @@ template <> struct max_n_1<0> { // Default packet types template -struct PacketType { +struct PacketType : internal::packet_traits { typedef typename internal::packet_traits::type type; - enum { size = internal::unpacket_traits::size }; }; // For CUDA packet types when using a GpuDevice #if defined(EIGEN_USE_GPU) && defined(__CUDACC__) template <> - struct PacketType { +struct PacketType { typedef half2 type; static const int size = 2; - }; -template <> -struct PacketType { - typedef float4 type; - static const int size = 4; -}; -template <> -struct PacketType { - typedef double2 type; - static const int size = 2; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; }; #endif diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 1b4fdd03f..d9bbcd858 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -328,7 +328,7 @@ struct FullReducer { // Unfortunately nvidia doesn't support well exotic types such as complex, // so reduce the scope of the optimized version of the code to the simple case // of floats and half floats. - #ifdef EIGEN_HAS_CUDA_FP16 +#ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || (internal::is_same::value && reducer_traits::PacketAccess)); From 1fc2746417c8b4bf1645c703b1f99b1871c8d16e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 22:52:37 +0200 Subject: [PATCH 22/85] Make Arrays's ctor/assignment noexcept --- Eigen/src/Core/Array.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index c0af4aa9d..7c2e0de16 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -149,7 +149,7 @@ class Array #if EIGEN_HAS_RVALUE_REFERENCES EIGEN_DEVICE_FUNC - Array(Array&& other) + Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) : Base(std::move(other)) { Base::_check_template_params(); @@ -157,7 +157,7 @@ class Array Base::_set_noalias(other); } EIGEN_DEVICE_FUNC - Array& operator=(Array&& other) + Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { other.swap(*this); return *this; From bd212438217dc3e169a35052f78e2e41a7ce3a3d Mon Sep 17 00:00:00 2001 From: Sean Templeton Date: Fri, 3 Jun 2016 10:51:35 -0500 Subject: [PATCH 23/85] Fix compile errors initializing packets on ARM DS-5 5.20 The ARM DS-5 5.20 compiler fails compiling with the following errors: "src/Core/arch/NEON/PacketMath.h", line 113: Error: #146: too many initializer values Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); ^ "src/Core/arch/NEON/PacketMath.h", line 118: Error: #146: too many initializer values Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); ^ "src/Core/arch/NEON/Complex.h", line 30: Error: #146: too many initializer values static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); ^ "src/Core/arch/NEON/Complex.h", line 31: Error: #146: too many initializer values static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); ^ The vectors are implemented as two doubles, hence the too many initializer values error. Changed the code to use intrinsic load functions which all compilers implementing NEON should have. --- Eigen/src/Core/arch/NEON/Complex.h | 5 +++-- Eigen/src/Core/arch/NEON/PacketMath.h | 15 +++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index d2d467936..234f29b80 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -14,8 +14,9 @@ namespace Eigen { namespace internal { -static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); -static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); +const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; +static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA ); +static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA ); //---------- float ---------- struct Packet2cf diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index deb2d7e42..fa16bc9c8 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -51,15 +51,12 @@ typedef uint32x4_t Packet4ui; #if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG //Special treatment for Apple's llvm-gcc, its NEON packet types are unions - #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}} + #define EIGEN_INIT_NEON_PACKET2D(X, Y) {{X, Y}} #else //Default initializer for packets - #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} + #define EIGEN_INIT_NEON_PACKET2D(X, Y) {X, Y} #endif - // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function // which available on LLVM and GCC (at least) #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC @@ -122,12 +119,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { - Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + const float32_t f[] = {0, 1, 2, 3}; + Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { - Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); + const int32_t i[] = {0, 1, 2, 3}; + Packet4i countdown = vld1q_s32(i); return vaddq_s32(pset1(a), countdown); } @@ -585,7 +584,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { r template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { - Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1); + Packet2d countdown = EIGEN_INIT_NEON_PACKET2D(0, 1); return vaddq_f64(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); } From c53687dd14443679e92019d34ae0d38f9b570d52 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Sun, 5 Jun 2016 11:10:30 -0700 Subject: [PATCH 24/85] Add randomized properties tests for betainc special function. --- test/array.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/test/array.cpp b/test/array.cpp index 8ed1269c2..4cd4f262b 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -708,6 +708,61 @@ template void array_special_functions() CALL_SUBTEST(res = betainc(a, b, x); verify_component_wise(res, v);); } + + // Test various properties of betainc + { + ArrayType m1 = ArrayType::Random(32); + ArrayType m2 = ArrayType::Random(32); + ArrayType m3 = ArrayType::Random(32); + ArrayType one = ArrayType::Constant(32, Scalar(1.0)); + const Scalar eps = std::numeric_limits::epsilon(); + ArrayType a = (m1 * 4.0).exp(); + ArrayType b = (m2 * 4.0).exp(); + ArrayType x = m3.abs(); + + // betainc(a, 1, x) == x**a + CALL_SUBTEST( + ArrayType test = betainc(a, one, x); + ArrayType expected = x.pow(a); + verify_component_wise(test, expected);); + + // betainc(1, b, x) == 1 - (1 - x)**b + CALL_SUBTEST( + ArrayType test = betainc(one, b, x); + ArrayType expected = one - (one - x).pow(b); + verify_component_wise(test, expected);); + + // betainc(a, b, x) == 1 - betainc(b, a, 1-x) + CALL_SUBTEST( + ArrayType test = betainc(a, b, x) + betainc(b, a, one - x); + ArrayType expected = one; + verify_component_wise(test, expected);); + + // betainc(a+1, b, x) = betainc(a, b, x) - x**a * (1 - x)**b / (a * beta(a, b)) + CALL_SUBTEST( + ArrayType num = x.pow(a) * (one - x).pow(b); + ArrayType denom = a * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp(); + // Add eps to rhs and lhs so that component-wise test doesn't result in + // nans when both outputs are zeros. + ArrayType expected = betainc(a, b, x) - num / denom + eps; + ArrayType test = betainc(a + one, b, x) + eps; + if (sizeof(Scalar) >= 8) { // double + verify_component_wise(test, expected); + } else { + // Reason for limited test: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232 + verify_component_wise(test.head(8), expected.head(8)); + }); + + // betainc(a, b+1, x) = betainc(a, b, x) + x**a * (1 - x)**b / (b * beta(a, b)) + CALL_SUBTEST( + // Add eps to rhs and lhs so that component-wise test doesn't result in + // nans when both outputs are zeros. + ArrayType num = x.pow(a) * (one - x).pow(b); + ArrayType denom = b * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp(); + ArrayType expected = betainc(a, b, x) + num / denom + eps; + ArrayType test = betainc(a, b + one, x) + eps; + verify_component_wise(test, expected);); + } #endif } From d7e3e4bb0407718d27cb62c6409dfaf0ea0986d1 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Sun, 5 Jun 2016 10:15:41 +0200 Subject: [PATCH 25/85] Removed executable bits from header files. From 5b95b4daf9933063c57c24c2338b043ec257d461 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 5 Jun 2016 12:57:48 -0700 Subject: [PATCH 26/85] Moved static assertions into the class constructor to make the code more portable --- Eigen/src/Core/CwiseTernaryOp.h | 41 +++++++++++---------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/Eigen/src/Core/CwiseTernaryOp.h b/Eigen/src/Core/CwiseTernaryOp.h index fe71c07cf..9f3576fec 100644 --- a/Eigen/src/Core/CwiseTernaryOp.h +++ b/Eigen/src/Core/CwiseTernaryOp.h @@ -34,22 +34,6 @@ struct traits > { typedef typename result_of::type Scalar; - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageKind, - typename internal::traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageKind, - typename internal::traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageIndex, - typename internal::traits::StorageIndex>::value), - STORAGE_INDEX_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageIndex, - typename internal::traits::StorageIndex>::value), - STORAGE_INDEX_MUST_MATCH) typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::StorageIndex StorageIndex; @@ -100,18 +84,8 @@ template ::StorageKind>, - internal::no_assignment_operator { - EIGEN_STATIC_ASSERT( - (internal::is_same< - typename internal::traits::StorageKind, - typename internal::traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same< - typename internal::traits::StorageKind, - typename internal::traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - + internal::no_assignment_operator +{ public: typedef typename internal::remove_all::type Arg1; typedef typename internal::remove_all::type Arg2; @@ -137,6 +111,17 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl< // require the sizes to match EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3) + + // The index types should match + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && a1.rows() == a3.rows() && a1.cols() == a3.cols()); } From 1f1e0b9e30a175c7a3197ffc87898404dda7c45e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Sun, 5 Jun 2016 12:59:11 -0700 Subject: [PATCH 27/85] Silenced compilation warning --- Eigen/src/Core/SpecialFunctions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index 0dd9a1dc3..a657cb854 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h @@ -1050,8 +1050,8 @@ struct betainc_impl { template struct betainc_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) { - /* betaincf.c + static EIGEN_STRONG_INLINE Scalar run(Scalar, Scalar, Scalar) { + /* betaincf.c * * Incomplete beta integral * From 66e99ab6a1444d8e3d47211e4540837e6b982a3a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 6 Jun 2016 15:11:41 +0200 Subject: [PATCH 28/85] Relax mixing-type constraints for binary coefficient-wise operators: - Replace internal::scalar_product_traits by Eigen::ScalarBinaryOpTraits - Remove the "functor_is_product_like" helper (was pretty ugly) - Currently, OP is not used, but it is available to the user for fine grained tuning - Currently, only the following operators have been generalized: *,/,+,-,=,*=,/=,+=,-= - TODO: generalize all other binray operators (comparisons,pow,etc.) - TODO: handle "scalar op array" operators (currently only * is handled) - TODO: move the handling of the "void" scalar type to ScalarBinaryOpTraits --- Eigen/src/Core/ArrayBase.h | 4 +- Eigen/src/Core/AssignEvaluator.h | 12 ++-- Eigen/src/Core/CwiseBinaryOp.h | 4 +- Eigen/src/Core/DiagonalMatrix.h | 6 +- Eigen/src/Core/Dot.h | 12 ++-- Eigen/src/Core/EigenBase.h | 4 +- Eigen/src/Core/MatrixBase.h | 4 +- Eigen/src/Core/NoAlias.h | 6 +- Eigen/src/Core/PlainObjectBase.h | 2 +- Eigen/src/Core/Product.h | 3 +- Eigen/src/Core/ProductEvaluators.h | 55 ++++++++-------- Eigen/src/Core/Redux.h | 4 +- Eigen/src/Core/Ref.h | 2 +- Eigen/src/Core/SelfCwiseBinaryOp.h | 10 +-- Eigen/src/Core/Solve.h | 13 ++-- Eigen/src/Core/TriangularMatrix.h | 20 +++--- Eigen/src/Core/VectorwiseOp.h | 4 +- Eigen/src/Core/functors/AssignmentFunctors.h | 54 ++++++++-------- Eigen/src/Core/functors/BinaryFunctors.h | 64 ++++++++----------- .../Core/products/GeneralBlockPanelKernel.h | 4 +- Eigen/src/Core/products/GeneralMatrixMatrix.h | 4 +- .../products/GeneralMatrixMatrixTriangular.h | 4 +- Eigen/src/Core/products/GeneralMatrixVector.h | 4 +- .../Core/products/TriangularMatrixVector.h | 4 +- Eigen/src/Core/util/ForwardDeclarations.h | 5 +- Eigen/src/Core/util/Macros.h | 4 +- Eigen/src/Core/util/Meta.h | 64 +++++++++++-------- Eigen/src/Core/util/XprHelper.h | 10 +-- Eigen/src/Geometry/AlignedBox.h | 6 +- Eigen/src/Geometry/Homogeneous.h | 12 ++-- Eigen/src/Householder/HouseholderSequence.h | 2 +- .../IterativeLinearSolvers/SolveWithGuess.h | 4 +- Eigen/src/LU/FullPivLU.h | 6 +- Eigen/src/LU/InverseImpl.h | 6 +- Eigen/src/LU/PartialPivLU.h | 6 +- Eigen/src/QR/ColPivHouseholderQR.h | 4 +- .../src/QR/CompleteOrthogonalDecomposition.h | 4 +- Eigen/src/QR/FullPivHouseholderQR.h | 4 +- Eigen/src/SparseCore/SparseAssign.h | 20 +++--- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 20 +++--- Eigen/src/SparseCore/SparseDenseProduct.h | 4 +- Eigen/src/SparseCore/SparseMatrix.h | 4 +- Eigen/src/SparseCore/SparseMatrixBase.h | 2 +- Eigen/src/SparseCore/SparseProduct.h | 12 ++-- Eigen/src/SparseCore/SparseSelfAdjointView.h | 10 +-- Eigen/src/SparseQR/SparseQR.h | 8 +-- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 4 +- Eigen/src/plugins/CommonCwiseBinaryOps.h | 18 +++++- Eigen/src/plugins/CommonCwiseUnaryOps.h | 6 +- Eigen/src/plugins/MatrixCwiseBinaryOps.h | 4 +- blas/PackedTriangularMatrixVector.h | 4 +- test/array.cpp | 2 +- test/array_for_matrix.cpp | 2 +- test/mixingtypes.cpp | 32 +++++++--- test/vectorization_logic.cpp | 7 +- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 56 ++++++++-------- .../KroneckerProduct/KroneckerTensorProduct.h | 4 +- 57 files changed, 345 insertions(+), 314 deletions(-) diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 3d9c37bf6..62851a0c2 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -176,7 +176,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator-=(const ArrayBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -189,7 +189,7 @@ template EIGEN_STRONG_INLINE Derived & ArrayBase::operator+=(const ArrayBase& other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 4b914ac0c..f966724cc 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -687,7 +687,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) { - call_dense_assignment_loop(dst, src, internal::assign_op()); + call_dense_assignment_loop(dst, src, internal::assign_op()); } /*************************************************************************** @@ -722,13 +722,13 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst& dst, const Src& src) { - call_assignment(dst, src, internal::assign_op()); + call_assignment(dst, src, internal::assign_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) { - call_assignment(dst, src, internal::assign_op()); + call_assignment(dst, src, internal::assign_op()); } // Deal with "assume-aliasing" @@ -787,7 +787,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias(Dst& dst, const Src& src) { - call_assignment_no_alias(dst, src, internal::assign_op()); + call_assignment_no_alias(dst, src, internal::assign_op()); } template @@ -809,7 +809,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) { - call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); } // forward declaration @@ -838,7 +838,7 @@ template< typename DstXprType, typename SrcXprType, typename Functor, typename S struct Assignment { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); src.evalTo(dst); diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 39820fd7d..aa3297354 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -160,7 +160,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -173,7 +173,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 5a9e3abd4..aa619dd5c 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -320,16 +320,16 @@ template<> struct AssignmentKind { typedef Diagonal2De template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst.setZero(); dst.diagonal() = src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index f3c869635..1d7f2262e 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -28,22 +28,24 @@ template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.template binaryExpr(b).sum(); } }; template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.transpose().template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.transpose().template binaryExpr(b).sum(); } }; @@ -62,7 +64,7 @@ struct dot_nocheck template template EIGEN_DEVICE_FUNC -typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType +typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index ba8e09674..f76995af9 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -138,7 +138,7 @@ template template Derived& DenseBase::operator+=(const EigenBase &other) { - call_assignment(derived(), other.derived(), internal::add_assign_op()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -146,7 +146,7 @@ template template Derived& DenseBase::operator-=(const EigenBase &other) { - call_assignment(derived(), other.derived(), internal::sub_assign_op()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index b8b7f458f..f63505fef 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -193,7 +193,7 @@ template class MatrixBase template EIGEN_DEVICE_FUNC - typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType + typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType dot(const MatrixBase& other) const; EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; @@ -381,7 +381,7 @@ template class MatrixBase #ifndef EIGEN_PARSED_BY_DOXYGEN /// \internal helper struct to form the return type of the cross product template struct cross_product_return_type { - typedef typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; typedef Matrix type; }; #endif // EIGEN_PARSED_BY_DOXYGEN diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index ffb673cee..33908010b 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -39,7 +39,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); return m_expression; } @@ -47,7 +47,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); return m_expression; } @@ -55,7 +55,7 @@ class NoAlias EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) { - call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); return m_expression; } diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 570dbd53b..64f5eb052 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -718,7 +718,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. - internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); return this->derived(); } diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 8aa1de081..bad289761 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -18,11 +18,12 @@ namespace internal { // Determine the scalar of Product. This is normally the same as Lhs::Scalar times // Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. +// TODO: this could be removed once ScalarBinaryOpTraits handles void. template::Shape, typename RhsShape = typename evaluator_traits::Shape > struct product_result_scalar { - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; }; template diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index cc7166062..7f041e5dd 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -124,12 +124,12 @@ protected: // Dense = Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, +struct Assignment, internal::assign_op::Scalar>, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); @@ -138,12 +138,12 @@ struct Assignment, internal::assign_op -struct Assignment, internal::add_assign_op, Dense2Dense, +struct Assignment, internal::add_assign_op::Scalar>, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::addTo(dst, src.lhs(), src.rhs()); @@ -152,12 +152,12 @@ struct Assignment, internal::add_assign_op< // Dense -= Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::sub_assign_op, Dense2Dense, +struct Assignment, internal::sub_assign_op::Scalar>, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::subTo(dst, src.lhs(), src.rhs()); @@ -187,37 +187,38 @@ struct Assignment" as well. template -struct evaluator_assume_aliasing, const OtherXpr, +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, const Product >, DenseShape > { static const bool value = true; }; -template +template struct assignment_from_xpr_plus_product { - typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + typedef CwiseBinaryOp, const OtherXpr, const ProductType> SrcXprType; + template static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const Func1& func) + void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) { - call_assignment_no_alias(dst, src.lhs(), func); + call_assignment_no_alias(dst, src.lhs(), Func1()); call_assignment_no_alias(dst, src.rhs(), Func2()); } }; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::assign_op, internal::add_assign_op > +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> +struct Assignment, const OtherXpr, + const Product >, internal::assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, internal::assign_op, internal::add_assign_op > {}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::add_assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::add_assign_op, internal::add_assign_op > +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> +struct Assignment, const OtherXpr, + const Product >, internal::add_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, internal::add_assign_op, internal::add_assign_op > {}; -template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, const OtherXpr, - const Product >, internal::sub_assign_op, Dense2Dense> - : assignment_from_xpr_plus_product, Scalar, internal::sub_assign_op, internal::sub_assign_op > +template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> +struct Assignment, const OtherXpr, + const Product >, internal::sub_assign_op, Dense2Dense> + : assignment_from_xpr_plus_product, internal::sub_assign_op, internal::sub_assign_op > {}; //---------------------------------------- @@ -369,21 +370,21 @@ struct generic_product_impl { // Same as: dst.noalias() = lhs.lazyProduct(rhs); // but easier on the compiler side - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); } template static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() += lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); } template static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { // dst.noalias() -= lhs.lazyProduct(rhs); - call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); } // template @@ -735,7 +736,7 @@ template { - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; public: enum { CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 7984cd6e1..ec969d9b9 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -450,7 +450,7 @@ DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(0); - return derived().redux(Eigen::internal::scalar_sum_op()); + return derived().redux(Eigen::internal::scalar_sum_op()); } /** \returns the mean of all coefficients of *this @@ -465,7 +465,7 @@ DenseBase::mean() const #pragma warning push #pragma warning ( disable : 2259 ) #endif - return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); #ifdef __INTEL_COMPILER #pragma warning pop #endif diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 6e94181f3..17065fdd5 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -262,7 +262,7 @@ template class Ref< template EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { - internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); Base::construct(m_object); } diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h index 78fff1549..719ed72a5 100644 --- a/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -12,11 +12,13 @@ namespace Eigen { +// TODO generalize the scalar type of 'other' + template EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); return derived(); } @@ -24,7 +26,7 @@ template EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); return derived(); } @@ -32,7 +34,7 @@ template EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); return derived(); } @@ -40,7 +42,7 @@ template EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { typedef typename Derived::PlainObject PlainObject; - internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); + internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); return derived(); } diff --git a/Eigen/src/Core/Solve.h b/Eigen/src/Core/Solve.h index ba2ee53b8..038ad5b11 100644 --- a/Eigen/src/Core/Solve.h +++ b/Eigen/src/Core/Solve.h @@ -134,10 +134,10 @@ protected: // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? src.dec()._solve_impl(src.rhs(), dst); @@ -146,10 +146,10 @@ struct Assignment, internal::assign_op -struct Assignment,RhsType>, internal::assign_op, Dense2Dense, Scalar> +struct Assignment,RhsType>, internal::assign_op, Dense2Dense, Scalar> { typedef Solve,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); } @@ -157,10 +157,11 @@ struct Assignment,RhsType>, internal: // Specialization for "dst = dec.adjoint().solve(rhs)" template -struct Assignment, const Transpose >,RhsType>, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, const Transpose >,RhsType>, + internal::assign_op, Dense2Dense, Scalar> { typedef Solve, const Transpose >,RhsType> SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); } diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 5c5e5028e..8731e9127 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -367,14 +367,14 @@ template class TriangularViewImpl<_Mat template EIGEN_DEVICE_FUNC TriangularViewType& operator+=(const DenseBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); return derived(); } /** \sa MatrixBase::operator-=() */ template EIGEN_DEVICE_FUNC TriangularViewType& operator-=(const DenseBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -552,7 +552,7 @@ template inline TriangularView& TriangularViewImpl::operator=(const MatrixBase& other) { - internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); + internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op()); return derived(); } @@ -804,7 +804,7 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src) { - call_triangular_assignment_loop(dst, src, internal::assign_op()); + call_triangular_assignment_loop(dst, src, internal::assign_op()); } template<> struct AssignmentKind { typedef Triangular2Triangular Kind; }; @@ -933,10 +933,10 @@ namespace internal { // Triangular = Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::assign_op::Scalar>, Dense2Triangular, Scalar> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.setZero(); dst._assignProduct(src, 1); @@ -945,10 +945,10 @@ struct Assignment, internal::assign_ // Triangular += Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::add_assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::add_assign_op::Scalar>, Dense2Triangular, Scalar> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { dst._assignProduct(src, 1); } @@ -956,10 +956,10 @@ struct Assignment, internal::add_ass // Triangular -= Product template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar> -struct Assignment, internal::sub_assign_op, Dense2Triangular, Scalar> +struct Assignment, internal::sub_assign_op::Scalar>, Dense2Triangular, Scalar> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { dst._assignProduct(src, -1); } diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 193891189..00a4a8c39 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -540,7 +540,7 @@ template class VectorwiseOp /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */ template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC - CwiseBinaryOp, + CwiseBinaryOp, const ExpressionTypeNestedCleaned, const typename ExtendedType::Type> operator+(const DenseBase& other) const @@ -553,7 +553,7 @@ template class VectorwiseOp /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */ template EIGEN_DEVICE_FUNC - CwiseBinaryOp, + CwiseBinaryOp, const ExpressionTypeNestedCleaned, const typename ExtendedType::Type> operator-(const DenseBase& other) const diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h index 51fef50e8..9b373c783 100644 --- a/Eigen/src/Core/functors/AssignmentFunctors.h +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -18,20 +18,24 @@ namespace internal { * \brief Template functor for scalar/packet assignment * */ -template struct assign_op { +template struct assign_op { EIGEN_EMPTY_STRUCT_CTOR(assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,b); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,b); } }; -template -struct functor_traits > { + +// Empty overload for void type (used by PermutationMatrix +template struct assign_op {}; + +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost, - PacketAccess = packet_traits::Vectorizable + Cost = NumTraits::ReadCost, + PacketAccess = is_same::value && packet_traits::Vectorizable && packet_traits::Vectorizable }; }; @@ -39,20 +43,20 @@ struct functor_traits > { * \brief Template functor for scalar/packet assignment with addition * */ -template struct add_assign_op { +template struct add_assign_op { EIGEN_EMPTY_STRUCT_CTOR(add_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost + NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = is_same::value && packet_traits::HasAdd }; }; @@ -60,20 +64,20 @@ struct functor_traits > { * \brief Template functor for scalar/packet assignment with subtraction * */ -template struct sub_assign_op { +template struct sub_assign_op { EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; } template - EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const - { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } + EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const + { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::ReadCost + NumTraits::AddCost, - PacketAccess = packet_traits::HasSub + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = is_same::value && packet_traits::HasSub }; }; @@ -98,7 +102,6 @@ struct functor_traits > { PacketAccess = is_same::value && packet_traits::HasMul }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; /** \internal * \brief Template functor for scalar/packet assignment with diviving @@ -120,7 +123,6 @@ struct functor_traits > { PacketAccess = is_same::value && packet_traits::HasDiv }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; /** \internal * \brief Template functor for scalar/packet assignment with swapping diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 6eb5b91ce..98fcebae5 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -21,22 +21,23 @@ namespace internal { * * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum() */ -template struct scalar_sum_op { -// typedef Scalar result_type; +template struct scalar_sum_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::padd(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, // rough estimate! + PacketAccess = is_same::value && packet_traits::HasAdd && packet_traits::HasAdd + // TODO vectorize mixed sum }; }; @@ -45,7 +46,7 @@ struct functor_traits > { * This is required to solve Bug 426. * \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast() */ -template<> struct scalar_sum_op : scalar_sum_op { +template<> struct scalar_sum_op : scalar_sum_op { EIGEN_DEPRECATED scalar_sum_op() {} }; @@ -57,11 +58,7 @@ template<> struct scalar_sum_op : scalar_sum_op { * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() */ template struct scalar_product_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasMul && packet_traits::HasMul - }; - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } template @@ -75,7 +72,8 @@ template struct functor_traits > { enum { Cost = (NumTraits::MulCost + NumTraits::MulCost)/2, // rough estimate! - PacketAccess = scalar_product_op::Vectorizable + PacketAccess = is_same::value && packet_traits::HasMul && packet_traits::HasMul + // TODO vectorize mixed product }; }; @@ -90,7 +88,7 @@ template struct scalar_conj_product_op { Conj = NumTraits::IsComplex }; - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const @@ -269,18 +267,19 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::operator- */ -template struct scalar_difference_op { +template struct scalar_difference_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::psub(a,b); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasSub + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = is_same::value && packet_traits::HasSub && packet_traits::HasSub }; }; @@ -290,11 +289,7 @@ struct functor_traits > { * \sa class CwiseBinaryOp, Cwise::operator/() */ template struct scalar_quotient_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv - }; - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } template @@ -305,7 +300,7 @@ template struct functor_traits > { typedef typename scalar_quotient_op::result_type result_type; enum { - PacketAccess = scalar_quotient_op::Vectorizable, + PacketAccess = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv, Cost = NumTraits::template Div::Cost }; }; @@ -446,7 +441,7 @@ struct functor_traits > template struct scalar_multiple2_op { - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } @@ -481,7 +476,7 @@ struct functor_traits > template struct scalar_quotient2_op { - typedef typename scalar_product_traits::ReturnType result_type; + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; } @@ -491,15 +486,6 @@ template struct functor_traits > { enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; -// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication -// where the mixing of different types is handled by scalar_product_traits -// In particular, real * complex is allowed. -// FIXME move this to functor_traits adding a functor_default -template struct functor_is_product_like { enum { ret = 0 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; - /** \internal * \brief Template functor to add a scalar to a fixed other one diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 253c03462..63a9fc462 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -363,7 +363,7 @@ class gebp_traits public: typedef _LhsScalar LhsScalar; typedef _RhsScalar RhsScalar; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, @@ -478,7 +478,7 @@ class gebp_traits, RealScalar, _ConjLhs, false> public: typedef std::complex LhsScalar; typedef RealScalar RhsScalar; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { ConjLhs = _ConjLhs, diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 7528fef24..b1465c3b5 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -25,7 +25,7 @@ struct general_matrix_matrix_product Traits; - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, const LhsScalar* lhs, Index lhsStride, @@ -55,7 +55,7 @@ struct general_matrix_matrix_product Traits; -typedef typename scalar_product_traits::ReturnType ResScalar; +typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static void run(Index rows, Index cols, Index depth, const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, diff --git a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index 80ba89465..29d6dc721 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -40,7 +40,7 @@ template struct general_matrix_matrix_triangular_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) @@ -57,7 +57,7 @@ template struct general_matrix_matrix_triangular_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha, level3_blocking& blocking) diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index fc8886511..4a5cf3fb6 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -58,7 +58,7 @@ namespace internal { template struct general_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable @@ -334,7 +334,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product struct general_matrix_vector_product { -typedef typename scalar_product_traits::ReturnType ResScalar; +typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { Vectorizable = packet_traits::Vectorizable && packet_traits::Vectorizable diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index f79840aa7..c11a983c7 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -20,7 +20,7 @@ struct triangular_matrix_vector_product; template struct triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -91,7 +91,7 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product struct triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = ((Mode&Lower)==Lower), HasUnitDiag = (Mode & UnitDiag)==UnitDiag, diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 42e2e75b9..045e22658 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -131,6 +131,7 @@ template class ArrayWrapper; template class MatrixWrapper; template class SolverBase; template class InnerIterator; +template struct ScalarBinaryOpTraits; namespace internal { template struct kernel_retval_base; @@ -175,8 +176,8 @@ namespace internal { // with optional conjugation of the arguments. template struct conj_helper; -template struct scalar_sum_op; -template struct scalar_difference_op; +template struct scalar_sum_op; +template struct scalar_difference_op; template struct scalar_conj_product_op; template struct scalar_opposite_op; template struct scalar_conjugate_op; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index c9a0b9893..35547fdda 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -885,9 +885,9 @@ namespace Eigen { } // the expression type of a cwise product -#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \ +#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \ CwiseBinaryOp< \ - internal::scalar_product_op< \ + EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \ typename internal::traits::Scalar, \ typename internal::traits::Scalar \ >, \ diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 7ecd59add..af661c313 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -375,33 +375,6 @@ template struct scalar_product_traits enum { Defined = 0 }; }; -template struct scalar_product_traits -{ - enum { - // Cost = NumTraits::MulCost, - Defined = 1 - }; - typedef T ReturnType; -}; - -template struct scalar_product_traits > -{ - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; -}; - -template struct scalar_product_traits, T> -{ - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; -}; - // FIXME quick workaround around current limitation of result_of // template // struct result_of(ArgType0,ArgType1)> { @@ -434,6 +407,43 @@ T div_ceil(const T &a, const T &b) } // end namespace numext + +/** \class ScalarBinaryOpTraits + * \ingroup Core_Module + * + * \brief Determines whether the given binary operation of two numeric types is allowed and what the scalar return type is. + * + * \sa CwiseBinaryOp + */ +template +struct ScalarBinaryOpTraits +#ifndef EIGEN_PARSED_BY_DOXYGEN + // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class. + : internal::scalar_product_traits +#endif // EIGEN_PARSED_BY_DOXYGEN +{}; + +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef T ReturnType; +}; + +template +struct ScalarBinaryOpTraits,BinaryOp> +{ + enum { Defined = 1 }; + typedef std::complex ReturnType; +}; + +template +struct ScalarBinaryOpTraits, T,BinaryOp> +{ + enum { Defined = 1 }; + typedef std::complex ReturnType; +}; + } // end namespace Eigen #endif // EIGEN_META_H diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 3605de6fd..4fd4a9b0d 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -649,17 +649,13 @@ std::string demangle_flags(int f) } // end namespace internal -// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor -// that would take two operands of different types. If there were such an example, then this check should be -// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as -// currently they take only one typename Scalar template parameter. +// We require Lhs and Rhs to have "compatible" scalar types. // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to // add together a float matrix and a double matrix. +// Treat "void" as a special case. Needed for permutation products. TODO: this should be handled by ScalarBinaryOpTraits #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT((internal::functor_is_product_like::ret \ - ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same_or_void::value)), \ + EIGEN_STATIC_ASSERT(int(internal::is_same_or_void::value) || int(ScalarBinaryOpTraits::Defined), \ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) } // end namespace Eigen diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index 03f1a11f8..aeb043a6c 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -112,7 +112,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** \returns the center of the box */ inline const CwiseUnaryOp, - const CwiseBinaryOp, const VectorType, const VectorType> > + const CwiseBinaryOp, const VectorType, const VectorType> > center() const { return (m_min+m_max)/2; } @@ -120,7 +120,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * Note that this function does not get the same * result for integral or floating scalar types: see */ - inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const + inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const { return m_max - m_min; } /** \returns the volume of the bounding box */ @@ -131,7 +131,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * if the length of the diagonal is needed: diagonal().norm() * will provide it. */ - inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const + inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const { return sizes(); } /** \returns the vertex of the bounding box at the corner defined by diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index cd52b5470..1c35ca486 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -329,10 +329,10 @@ protected: // dense = homogeneous template< typename DstXprType, typename ArgType, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template topRows(src.nestedExpression().rows()) = src.nestedExpression(); dst.row(dst.rows()-1).setOnes(); @@ -341,10 +341,10 @@ struct Assignment, internal::assign_op // dense = homogeneous template< typename DstXprType, typename ArgType, typename Scalar> -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template leftCols(src.nestedExpression().cols()) = src.nestedExpression(); dst.col(dst.cols()-1).setOnes(); @@ -373,7 +373,7 @@ struct homogeneous_right_product_refactoring_helper typedef typename Rhs::ConstRowXpr ConstantColumn; typedef Replicate ConstantBlock; typedef Product LinearProduct; - typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; + typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; }; template @@ -414,7 +414,7 @@ struct homogeneous_left_product_refactoring_helper typedef typename Lhs::ConstColXpr ConstantColumn; typedef Replicate ConstantBlock; typedef Product LinearProduct; - typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; + typedef CwiseBinaryOp, const LinearProduct, const ConstantBlock> Xpr; }; template diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h index a57f81764..3ce0a693d 100644 --- a/Eigen/src/Householder/HouseholderSequence.h +++ b/Eigen/src/Householder/HouseholderSequence.h @@ -108,7 +108,7 @@ struct hseq_side_dependent_impl template struct matrix_type_times_scalar_type { - typedef typename scalar_product_traits::ReturnType + typedef typename ScalarBinaryOpTraits::ReturnType ResultScalar; typedef Matrix Type; diff --git a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h index 35923be3d..7d67d3ce2 100644 --- a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +++ b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h @@ -91,10 +91,10 @@ protected: // Specialization for "dst = dec.solveWithGuess(rhs)" // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> { typedef SolveWithGuess SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? dst = src.guess(); diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index c39f8e3d5..2d01b18c6 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -839,12 +839,12 @@ namespace internal { /***** Implementation of inverse() *****************************************************/ -template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef FullPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h index e202a55cb..3134632e1 100644 --- a/Eigen/src/LU/InverseImpl.h +++ b/Eigen/src/LU/InverseImpl.h @@ -286,11 +286,11 @@ struct compute_inverse_and_det_with_check namespace internal { // Specialization for "dense = dense_xpr.inverse()" -template -struct Assignment, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment, internal::assign_op, Dense2Dense> { typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we resize dst here? const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime); diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index b68916287..ac2902261 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -525,12 +525,12 @@ MatrixType PartialPivLU::reconstructedMatrix() const namespace internal { /***** Implementation of inverse() *****************************************************/ -template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { typedef PartialPivLU LuType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h index 7c559f952..525ee8c18 100644 --- a/Eigen/src/QR/ColPivHouseholderQR.h +++ b/Eigen/src/QR/ColPivHouseholderQR.h @@ -598,11 +598,11 @@ void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType & namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense, Scalar> { typedef ColPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h index 230d0d23c..52bcc2173 100644 --- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h +++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -510,11 +510,11 @@ void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl( namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense, Scalar> { typedef CompleteOrthogonalDecomposition CodType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows())); } diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h index 32a10f3fe..4f55d52a5 100644 --- a/Eigen/src/QR/FullPivHouseholderQR.h +++ b/Eigen/src/QR/FullPivHouseholderQR.h @@ -560,11 +560,11 @@ void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType namespace internal { template -struct Assignment >, internal::assign_op, Dense2Dense, Scalar> +struct Assignment >, internal::assign_op, Dense2Dense, Scalar> { typedef FullPivHouseholderQR QrType; typedef Inverse SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } diff --git a/Eigen/src/SparseCore/SparseAssign.h b/Eigen/src/SparseCore/SparseAssign.h index 4a8dd12e4..b284fa9e4 100644 --- a/Eigen/src/SparseCore/SparseAssign.h +++ b/Eigen/src/SparseCore/SparseAssign.h @@ -34,8 +34,8 @@ template inline Derived& SparseMatrixBase::operator=(const SparseMatrixBase& other) { // by default sparse evaluation do not alias, so we can safely bypass the generic call_assignment routine - internal::Assignment > - ::run(derived(), other.derived(), internal::assign_op()); + internal::Assignment > + ::run(derived(), other.derived(), internal::assign_op()); return derived(); } @@ -127,7 +127,7 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src) template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> struct Assignment { - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { assign_sparse_to_sparse(dst.derived(), src.derived()); } @@ -141,7 +141,7 @@ struct Assignment { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - if(internal::is_same >::value) + if(internal::is_same >::value) dst.setZero(); internal::evaluator srcEval(src); @@ -156,10 +156,10 @@ struct Assignment // Specialization for "dst = dec.solve(rhs)" // NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error template -struct Assignment, internal::assign_op, Sparse2Sparse, Scalar> +struct Assignment, internal::assign_op, Sparse2Sparse, Scalar> { typedef Solve SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { src.dec()._solve_impl(src.rhs(), dst); } @@ -176,7 +176,7 @@ struct Assignment typedef Array ArrayXI; typedef Array ArrayXS; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { Index size = src.diagonal().size(); dst.makeCompressed(); @@ -187,15 +187,15 @@ struct Assignment } template - static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrixBase &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst.diagonal() = src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) { dst.diagonal() += src.diagonal(); } - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) { dst.diagonal() -= src.diagonal(); } }; } // end namespace internal diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index d422f3cbe..dd21eb8c5 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -579,7 +579,7 @@ template template Derived& SparseMatrixBase::operator+=(const DiagonalBase& other) { - call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); + call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -587,7 +587,7 @@ template template Derived& SparseMatrixBase::operator-=(const DiagonalBase& other) { - call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); + call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -600,31 +600,31 @@ SparseMatrixBase::cwiseProduct(const MatrixBase &other) c } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> operator+(const MatrixBase &a, const SparseMatrixBase &b) { - return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> operator+(const SparseMatrixBase &a, const MatrixBase &b) { - return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const DenseDerived, const SparseDerived> operator-(const MatrixBase &a, const SparseMatrixBase &b) { - return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const DenseDerived, const SparseDerived>(a.derived(), b.derived()); } template -EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const SparseDerived, const DenseDerived> operator-(const SparseMatrixBase &a, const MatrixBase &b) { - return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); + return CwiseBinaryOp, const SparseDerived, const DenseDerived>(a.derived(), b.derived()); } } // end namespace Eigen diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 476796dd7..0547db596 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -74,7 +74,7 @@ struct sparse_time_dense_product_impl let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators // template -// struct scalar_product_traits > +// struct ScalarBinaryOpTraits > // { // enum { // Defined = 1 @@ -97,7 +97,7 @@ struct sparse_time_dense_product_impl::ReturnType rhs_j(alpha * rhs.coeff(j,c)); + typename ScalarBinaryOpTraits::ReturnType rhs_j(alpha * rhs.coeff(j,c)); for(LhsInnerIterator it(lhsEval,j); it ;++it) res.coeffRef(it.index(),c) += it.value() * rhs_j; } diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index a78bd57c3..531fea399 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -440,7 +440,7 @@ class SparseMatrix template void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func); - void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } + void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op()); } template void collapseDuplicates(DupFunctor dup_func = DupFunctor()); @@ -979,7 +979,7 @@ template template void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) { - internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); + internal::set_from_triplets >(begin, end, *this, internal::scalar_sum_op()); } /** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied: diff --git a/Eigen/src/SparseCore/SparseMatrixBase.h b/Eigen/src/SparseCore/SparseMatrixBase.h index 24df36884..45f64e7f2 100644 --- a/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/Eigen/src/SparseCore/SparseMatrixBase.h @@ -256,7 +256,7 @@ template class SparseMatrixBase Derived& operator/=(const Scalar& other); template struct CwiseProductDenseReturnType { - typedef CwiseBinaryOp::Scalar, typename internal::traits::Scalar >::ReturnType>, diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index b23003bb1..84e69903b 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -99,10 +99,10 @@ struct generic_product_impl -struct Assignment, internal::assign_op, Sparse2Dense> +struct Assignment, internal::assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { generic_product_impl::evalTo(dst,src.lhs(),src.rhs()); } @@ -110,10 +110,10 @@ struct Assignment, internal::assig // dense += sparse-product (can be sparse*sparse, sparse*perm, etc.) template< typename DstXprType, typename Lhs, typename Rhs> -struct Assignment, internal::add_assign_op, Sparse2Dense> +struct Assignment, internal::add_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { generic_product_impl::addTo(dst,src.lhs(),src.rhs()); } @@ -121,10 +121,10 @@ struct Assignment, internal::add_a // dense -= sparse-product (can be sparse*sparse, sparse*perm, etc.) template< typename DstXprType, typename Lhs, typename Rhs> -struct Assignment, internal::sub_assign_op, Sparse2Dense> +struct Assignment, internal::sub_assign_op::Scalar>, Sparse2Dense> { typedef Product SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { generic_product_impl::subTo(dst,src.lhs(),src.rhs()); } diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index b92bb17e2..4f0c84d88 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -223,13 +223,13 @@ struct Assignment - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { internal::permute_symm_to_fullsymm(src.matrix(), dst); } template - static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DynamicSparseMatrix& dst, const SrcXprType &src, const internal::assign_op &/*func*/) { // TODO directly evaluate into dst; SparseMatrix tmp(dst.rows(),dst.cols()); @@ -586,12 +586,12 @@ class SparseSymmetricPermutationProduct namespace internal { template -struct Assignment, internal::assign_op, Sparse2Sparse> +struct Assignment, internal::assign_op, Sparse2Sparse> { typedef SparseSymmetricPermutationProduct SrcXprType; typedef typename DstXprType::StorageIndex DstIndex; template - static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) + static void run(SparseMatrix &dst, const SrcXprType &src, const internal::assign_op &) { // internal::permute_symm_to_fullsymm(m_matrix,_dest,m_perm.indices().data()); SparseMatrix tmp; @@ -600,7 +600,7 @@ struct Assignment } template - static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) + static void run(SparseSelfAdjointView& dst, const SrcXprType &src, const internal::assign_op &) { internal::permute_symm_to_symm(src.matrix(),dst.matrix(),src.perm().indices().data()); } diff --git a/Eigen/src/SparseQR/SparseQR.h b/Eigen/src/SparseQR/SparseQR.h index acd7f7e10..2d4498b03 100644 --- a/Eigen/src/SparseQR/SparseQR.h +++ b/Eigen/src/SparseQR/SparseQR.h @@ -705,12 +705,12 @@ struct evaluator_traits > }; template< typename DstXprType, typename SparseQRType> -struct Assignment, internal::assign_op, Sparse2Sparse> +struct Assignment, internal::assign_op, Sparse2Sparse> { typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows()); idMat.setIdentity(); @@ -721,12 +721,12 @@ struct Assignment, internal: }; template< typename DstXprType, typename SparseQRType> -struct Assignment, internal::assign_op, Sparse2Dense> +struct Assignment, internal::assign_op, Sparse2Dense> { typedef SparseQRMatrixQReturnType SrcXprType; typedef typename DstXprType::Scalar Scalar; typedef typename DstXprType::StorageIndex StorageIndex; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) { dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows()); } diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index c3f8c2575..c6ea12c90 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -4,10 +4,10 @@ */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived) +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product) operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived()); + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived()); } /** \returns an expression of the coefficient wise quotient of \c *this and \a other diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index a8fa287c9..3c13b7f06 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2016 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -16,7 +16,13 @@ * * \sa class CwiseBinaryOp, operator-=() */ -EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op) +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference) +operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const +{ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference)(derived(), other.derived()); +} /** \returns an expression of the sum of \c *this and \a other * @@ -24,7 +30,13 @@ EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op) * * \sa class CwiseBinaryOp, operator+=() */ -EIGEN_MAKE_CWISE_BINARY_OP(operator+,internal::scalar_sum_op) +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum) +operator+(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const +{ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum)(derived(), other.derived()); +} /** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other * diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index 67ec601b9..927167aff 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -73,7 +73,7 @@ operator/(const Scalar& scalar) const /** Overloaded for efficiently multipling with compatible scalar types */ template EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, +typename internal::enable_if::Defined, const CwiseUnaryOp, const Derived> >::type operator*(const T& scalar) const { @@ -91,7 +91,7 @@ operator*(const Scalar& scalar, const StorageBaseType& matrix) template EIGEN_DEVICE_FUNC inline friend -typename internal::enable_if::Defined, +typename internal::enable_if::Defined, const CwiseUnaryOp, const Derived> >::type operator*(const T& scalar, const StorageBaseType& matrix) { @@ -104,7 +104,7 @@ operator*(const T& scalar, const StorageBaseType& matrix) template EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, +typename internal::enable_if::Defined, const CwiseUnaryOp, const Derived> >::type operator/(const T& scalar) const { diff --git a/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/Eigen/src/plugins/MatrixCwiseBinaryOps.h index 6dd2e1192..59581e618 100644 --- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h @@ -19,10 +19,10 @@ */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived) +EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product) cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived()); + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived()); } /** \returns an expression of the coefficient-wise == operator of *this and \a other diff --git a/blas/PackedTriangularMatrixVector.h b/blas/PackedTriangularMatrixVector.h index e9886d56f..0039536a8 100644 --- a/blas/PackedTriangularMatrixVector.h +++ b/blas/PackedTriangularMatrixVector.h @@ -18,7 +18,7 @@ struct packed_triangular_matrix_vector_product; template struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -47,7 +47,7 @@ struct packed_triangular_matrix_vector_product struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, diff --git a/test/array.cpp b/test/array.cpp index 4cd4f262b..bd470d5f7 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -72,7 +72,7 @@ template void array(const ArrayType& m) VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.rowwise().sum().sum() - m1.sum()), m1.abs().sum()); if (!internal::isMuchSmallerThan(abs(m1.sum() - (m1+m2).sum()), m1.abs().sum(), test_precision())) VERIFY_IS_NOT_APPROX(((m1+m2).rowwise().sum()).sum(), m1.sum()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp index 75e6a778f..97e03be83 100644 --- a/test/array_for_matrix.cpp +++ b/test/array_for_matrix.cpp @@ -45,7 +45,7 @@ template void array_for_matrix(const MatrixType& m) VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum().sum() - m1.sum(), m1.squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum() + m2.colwise().sum() - (m1+m2).colwise().sum(), (m1+m2).squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum() - m2.rowwise().sum() - (m1-m2).rowwise().sum(), (m1-m2).squaredNorm()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index dbcf468ea..66d9b777a 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -42,6 +42,7 @@ template void mixingtypes(int size = SizeAtCompileType) Mat_f mf = Mat_f::Random(size,size); Mat_d md = mf.template cast(); + //Mat_d rd = md; Mat_cf mcf = Mat_cf::Random(size,size); Mat_cd mcd = mcf.template cast >(); Mat_cd rcd = mcd; @@ -56,16 +57,12 @@ template void mixingtypes(int size = SizeAtCompileType) mf+mf; - VERIFY_RAISES_ASSERT(mf+md); -#if !EIGEN_HAS_STD_RESULT_OF - // this one does not even compile with C++11 - VERIFY_RAISES_ASSERT(mf+mcf); -#endif + +// VERIFY_RAISES_ASSERT(mf+md); // does not even compile #ifdef EIGEN_DONT_VECTORIZE VERIFY_RAISES_ASSERT(vf=vd); VERIFY_RAISES_ASSERT(vf+=vd); - VERIFY_RAISES_ASSERT(mcd=md); #endif // check scalar products @@ -186,16 +183,35 @@ template void mixingtypes(int size = SizeAtCompileType) Mat_cd((scd * md.template cast().eval() * mcd).template triangularView())); - VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast().eval().array() * mcd.array() ); - VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast().eval().array() * mcd.array() ); + VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() + mcd.array(), md.template cast().eval().array() + mcd.array() ); + VERIFY_IS_APPROX( mcd.array() + md.array(), mcd.array() + md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() - mcd.array(), md.template cast().eval().array() - mcd.array() ); + VERIFY_IS_APPROX( mcd.array() - md.array(), mcd.array() - md.template cast().eval().array() ); // VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd += md, mcd + md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd -= md, mcd - md.template cast().eval() ); rcd = mcd; VERIFY_IS_APPROX( rcd.array() *= md.array(), mcd.array() * md.template cast().eval().array() ); rcd = mcd; VERIFY_IS_APPROX( rcd.array() /= md.array(), mcd.array() / md.template cast().eval().array() ); + + rcd = mcd; + VERIFY_IS_APPROX( rcd += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + + rcd = mcd; + VERIFY_IS_APPROX( rcd += mcd + md*md, mcd + mcd + ((md*md).template cast().eval()) ); } void test_mixingtypes() diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 24a7641ff..b7c2df64b 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -29,7 +29,7 @@ using internal::demangle_unrolling; template bool test_assign(const Dst&, const Src&, int traversal, int unrolling) { - typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; bool res = traits::Traversal==traversal; if(unrolling==InnerUnrolling+CompleteUnrolling) res = res && (int(traits::Unrolling)==InnerUnrolling || int(traits::Unrolling)==CompleteUnrolling); @@ -53,7 +53,7 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling) template bool test_assign(int traversal, int unrolling) { - typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; if(!res) { @@ -73,7 +73,8 @@ bool test_assign(int traversal, int unrolling) template bool test_redux(const Xpr&, int traversal, int unrolling) { - typedef internal::redux_traits,internal::redux_evaluator > traits; + typedef typename Xpr::Scalar Scalar; + typedef internal::redux_traits,internal::redux_evaluator > traits; bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; if(!res) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 089042751..ba61288a3 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -501,36 +501,36 @@ struct make_coherent_impl -struct scalar_product_traits,A_Scalar> -{ - enum { Defined = 1 }; - typedef Matrix ReturnType; -}; - -template -struct scalar_product_traits > -{ - enum { Defined = 1 }; - typedef Matrix ReturnType; -}; - -template -struct scalar_product_traits,typename DerType::Scalar> -{ - enum { Defined = 1 }; - typedef AutoDiffScalar ReturnType; -}; - -template -struct scalar_product_traits > -{ - enum { Defined = 1 }; - typedef AutoDiffScalar ReturnType; -}; - } // end namespace internal +template +struct ScalarBinaryOpTraits,A_Scalar> +{ + enum { Defined = 1 }; + typedef Matrix ReturnType; +}; + +template +struct ScalarBinaryOpTraits > +{ + enum { Defined = 1 }; + typedef Matrix ReturnType; +}; + +template +struct ScalarBinaryOpTraits,typename DerType::Scalar> +{ + enum { Defined = 1 }; + typedef AutoDiffScalar ReturnType; +}; + +template +struct ScalarBinaryOpTraits > +{ + enum { Defined = 1 }; + typedef AutoDiffScalar ReturnType; +}; + #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \ template \ inline const Eigen::AutoDiffScalar::type>::Scalar>, const typename Eigen::internal::remove_all::type> > \ diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h index bf9727c21..582fa8512 100644 --- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h +++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h @@ -203,7 +203,7 @@ struct traits > { typedef typename remove_all<_Lhs>::type Lhs; typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; typedef typename promote_index_type::type StorageIndex; enum { @@ -222,7 +222,7 @@ struct traits > typedef MatrixXpr XprKind; typedef typename remove_all<_Lhs>::type Lhs; typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; typedef typename cwise_promote_storage_type::StorageKind, typename traits::StorageKind, scalar_product_op >::ret StorageKind; typedef typename promote_index_type::type StorageIndex; From 9137f560f0c84470c7859a6db704bf5f18ae999d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 07:26:48 -0700 Subject: [PATCH 29/85] Moved assertions to the constructor to make the code more portable --- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 14 ++++++++++++++ unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h | 16 ---------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 4e873011e..a48cb1daa 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -426,6 +426,20 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h index 9509f8002..5f2e329f2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -227,22 +227,6 @@ struct traits::type Scalar; - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageKind, - typename traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::StorageKind, - typename traits::StorageKind>::value), - STORAGE_KIND_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::Index, - typename traits::Index>::value), - STORAGE_INDEX_MUST_MATCH) - EIGEN_STATIC_ASSERT( - (internal::is_same::Index, - typename traits::Index>::value), - STORAGE_INDEX_MUST_MATCH) typedef traits XprTraits; typedef typename traits::StorageKind StorageKind; typedef typename traits::Index Index; From df24f4a01d762b0ae7dee8a1a0c769c96e4da835 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 6 Jun 2016 16:46:46 +0200 Subject: [PATCH 30/85] bug #1201: improve code generation of affine*vec with MSVC --- Eigen/src/Geometry/Transform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 4fc876bcf..1a06c1e35 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -1367,7 +1367,7 @@ struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES); Matrix rhs; - rhs << other,1; + rhs.template head() = other; rhs[Dim] = 1; Matrix res(T.matrix() * rhs); return res.template head(); } From 33f0340188eb309dfa3b16c0758a878c3ea114d9 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 12:06:42 -0700 Subject: [PATCH 31/85] Implement result_of for the new ternary functors --- Eigen/src/Core/util/Meta.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 7ecd59add..bd3a0aa5d 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -328,6 +328,30 @@ struct result_of { enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; typedef typename binary_result_of_select::type type; }; + +template +struct ternary_result_of_select {typedef typename internal::remove_all::type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::result_type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::template result::type type;}; + +template +struct result_of { + template + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template + static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; + typedef typename ternary_result_of_select::type type; +}; #endif /** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer. From ea75dba2014ffa58acfcd160b5e59975c453f8da Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 13:32:28 -0700 Subject: [PATCH 32/85] Added missing EIGEN_DEVICE_FUNC qualifiers to the unary array ops --- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 775fa6ee0..9e35dc571 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -248,6 +248,7 @@ tan() const * * \sa tan(), asin(), acos() */ +EIGEN_DEVICE_FUNC inline const AtanReturnType atan() const { @@ -289,6 +290,7 @@ asin() const * * \sa tan(), sinh(), cosh() */ +EIGEN_DEVICE_FUNC inline const TanhReturnType tanh() const { @@ -302,6 +304,7 @@ tanh() const * * \sa sin(), tanh(), cosh() */ +EIGEN_DEVICE_FUNC inline const SinhReturnType sinh() const { @@ -315,6 +318,7 @@ sinh() const * * \sa tan(), sinh(), cosh() */ +EIGEN_DEVICE_FUNC inline const CoshReturnType cosh() const { @@ -332,6 +336,7 @@ cosh() const * * \sa digamma() */ +EIGEN_DEVICE_FUNC inline const LgammaReturnType lgamma() const { @@ -346,6 +351,7 @@ lgamma() const * * \sa Eigen::digamma(), Eigen::polygamma(), lgamma() */ +EIGEN_DEVICE_FUNC inline const DigammaReturnType digamma() const { @@ -364,6 +370,7 @@ digamma() const * * \sa erfc() */ +EIGEN_DEVICE_FUNC inline const ErfReturnType erf() const { @@ -382,6 +389,7 @@ erf() const * * \sa erf() */ +EIGEN_DEVICE_FUNC inline const ErfcReturnType erfc() const { @@ -455,6 +463,7 @@ cube() const * * \sa ceil(), floor() */ +EIGEN_DEVICE_FUNC inline const RoundReturnType round() const { @@ -468,6 +477,7 @@ round() const * * \sa ceil(), round() */ +EIGEN_DEVICE_FUNC inline const FloorReturnType floor() const { @@ -481,6 +491,7 @@ floor() const * * \sa floor(), round() */ +EIGEN_DEVICE_FUNC inline const CeilReturnType ceil() const { @@ -494,6 +505,7 @@ ceil() const * * \sa isfinite(), isinf() */ +EIGEN_DEVICE_FUNC inline const IsNaNReturnType isNaN() const { @@ -507,6 +519,7 @@ isNaN() const * * \sa isnan(), isfinite() */ +EIGEN_DEVICE_FUNC inline const IsInfReturnType isInf() const { @@ -520,6 +533,7 @@ isInf() const * * \sa isnan(), isinf() */ +EIGEN_DEVICE_FUNC inline const IsFiniteReturnType isFinite() const { From 7ef9f47b5874c33d15649a3312d463ecbd290365 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 14:09:46 -0700 Subject: [PATCH 33/85] Misc small improvements to the reduction code. --- .../CXX11/src/Tensor/TensorReductionCuda.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 0d1a098b7..e82530955 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -130,15 +130,17 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num if (block == 0) { // We're the first block to run, initialize the output value atomicExch(output, reducer.initialize()); - unsigned int old = atomicExch(semaphore, 2u); - assert(old == 1u); + __threadfence(); + atomicExch(semaphore, 2u); } else { + // Wait for the first block to initialize the output value. // Use atomicCAS here to ensure that the reads aren't cached - unsigned int val = atomicCAS(semaphore, 2u, 2u); - while (val < 2u) { + unsigned int val; + do { val = atomicCAS(semaphore, 2u, 2u); } + while (val < 2u); } } } @@ -166,12 +168,8 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num } if (gridDim.x > 1 && threadIdx.x == 0) { - unsigned int ticket = atomicInc(semaphore, UINT_MAX); - assert(ticket >= 2u); - if (ticket == gridDim.x + 1) { - // We're the last block, reset the semaphore - *semaphore = 0; - } + // Let the last block reset the semaphore + atomicInc(semaphore, gridDim.x + 1); } } From 3d71d3918e750be81739f24cdc0687648fb7f5c5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 6 Jun 2016 23:10:55 +0200 Subject: [PATCH 34/85] Disable shortcuts for res ?= prod when the scalar types do not match exactly. --- Eigen/src/Core/ProductEvaluators.h | 15 +++++++++------ test/mixingtypes.cpp | 13 +++++++++++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 7f041e5dd..71ae6e54c 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -122,14 +122,17 @@ protected: PlainObject m_result; }; +// The following three shortcuts are enabled only if the scalar types match excatly. +// TODO: we could enable them for different scalar types when the product is not vectorized. + // Dense = Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::assign_op::Scalar>, Dense2Dense, +struct Assignment, internal::assign_op, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); @@ -138,12 +141,12 @@ struct Assignment, internal::assign_op -struct Assignment, internal::add_assign_op::Scalar>, Dense2Dense, +struct Assignment, internal::add_assign_op, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::addTo(dst, src.lhs(), src.rhs()); @@ -152,12 +155,12 @@ struct Assignment, internal::add_assign_op< // Dense -= Product template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> -struct Assignment, internal::sub_assign_op::Scalar>, Dense2Dense, +struct Assignment, internal::sub_assign_op, Dense2Dense, typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type> { typedef Product SrcXprType; static EIGEN_STRONG_INLINE - void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { // FIXME shall we handle nested_eval here? generic_product_impl::subTo(dst, src.lhs(), src.rhs()); diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index 66d9b777a..2af188b25 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -208,10 +208,19 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX( rcd.array() /= md.array(), mcd.array() / md.template cast().eval().array() ); rcd = mcd; - VERIFY_IS_APPROX( rcd += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + VERIFY_IS_APPROX( rcd.noalias() += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + VERIFY_IS_APPROX( rcd.noalias() = md*md, ((md*md).eval().template cast()) ); rcd = mcd; - VERIFY_IS_APPROX( rcd += mcd + md*md, mcd + mcd + ((md*md).template cast().eval()) ); + VERIFY_IS_APPROX( rcd.noalias() += md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= md*md, mcd - ((md*md).eval().template cast()) ); + + VERIFY_IS_APPROX( rcd.noalias() = mcd + md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += mcd + md*md, mcd + mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= mcd + md*md, - ((md*md).eval().template cast()) ); } void test_mixingtypes() From 2c462f4201365d1ac4872245e81066746f09ac47 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 6 Jun 2016 23:11:38 +0200 Subject: [PATCH 35/85] Clean handling for void type in EIGEN_CHECK_BINARY_COMPATIBILIY --- Eigen/src/Core/util/Meta.h | 24 ++++++++++++++++++++++++ Eigen/src/Core/util/XprHelper.h | 8 +------- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index af661c313..efb9961ce 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -430,6 +430,30 @@ struct ScalarBinaryOpTraits typedef T ReturnType; }; +// For Matrix * Permutation +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef T ReturnType; +}; + +// For Permutation * Matrix +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef T ReturnType; +}; + +// for Permutation*Permutation +template +struct ScalarBinaryOpTraits +{ + enum { Defined = 1 }; + typedef void ReturnType; +}; + template struct ScalarBinaryOpTraits,BinaryOp> { diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 4fd4a9b0d..828813161 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -610,11 +610,6 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if struct is_same_or_void { enum { value = is_same::value }; }; -template struct is_same_or_void { enum { value = 1 }; }; -template struct is_same_or_void { enum { value = 1 }; }; -template<> struct is_same_or_void { enum { value = 1 }; }; - #ifdef EIGEN_DEBUG_ASSIGN std::string demangle_traversal(int t) { @@ -653,9 +648,8 @@ std::string demangle_flags(int f) // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to // add together a float matrix and a double matrix. -// Treat "void" as a special case. Needed for permutation products. TODO: this should be handled by ScalarBinaryOpTraits #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT(int(internal::is_same_or_void::value) || int(ScalarBinaryOpTraits::Defined), \ + EIGEN_STATIC_ASSERT(int(ScalarBinaryOpTraits::Defined), \ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) } // end namespace Eigen From 84b2060a9e3e57e49bad6208873e52c327ad135b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jun 2016 17:16:19 -0700 Subject: [PATCH 36/85] Fixed compilation error with gcc 4.4 --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 031dbf6f2..61df8032d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -106,7 +106,7 @@ struct TensorEvaluator, Device> { m_output(NULL) { // Accumulating a scalar isn't supported. - EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); eigen_assert(m_axis >= 0 && m_axis < NumDims); // Compute stride of scan axis @@ -136,7 +136,7 @@ struct TensorEvaluator, Device> { return true; } } - + template EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { return internal::ploadt(m_output + index); From db0118342c2480ba0da7e8e4fbdd6aebaac687a4 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Tue, 7 Jun 2016 19:17:18 +0200 Subject: [PATCH 37/85] Fixed compilation of BVH_Example (required for make doc) --- unsupported/doc/examples/BVH_Example.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/unsupported/doc/examples/BVH_Example.cpp b/unsupported/doc/examples/BVH_Example.cpp index 6b6fac075..4315fb440 100644 --- a/unsupported/doc/examples/BVH_Example.cpp +++ b/unsupported/doc/examples/BVH_Example.cpp @@ -6,9 +6,7 @@ using namespace Eigen; typedef AlignedBox Box2d; namespace Eigen { - namespace internal { Box2d bounding_box(const Vector2d &v) { return Box2d(v, v); } //compute the bounding box of a single point - } } struct PointPointMinimizer //how to compute squared distances between points and rectangles From 86aedc9282f85966461544c40403979b861d3e19 Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Tue, 7 Jun 2016 20:06:38 +0100 Subject: [PATCH 38/85] Add small fixes to TensorScanOp --- unsupported/Eigen/CXX11/src/Tensor/TensorScan.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 61df8032d..66fcacd8e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -81,7 +81,7 @@ struct TensorEvaluator, Device> { typedef typename XprType::Index Index; static const int NumDims = internal::array_size::Dimensions>::value; typedef DSizes Dimensions; - typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; @@ -152,6 +152,10 @@ struct TensorEvaluator, Device> { return m_output[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { if (m_output != NULL) { m_device.deallocate(m_output); From 002804938087acc829941c7500a5230fa5cf28b0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 9 Jun 2016 23:08:11 +0200 Subject: [PATCH 39/85] bug #1240: Remove any assumption on NEON vector types. --- Eigen/src/Core/arch/NEON/Complex.h | 3 ++- Eigen/src/Core/arch/NEON/PacketMath.h | 11 ++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h index 234f29b80..ccc00e5a6 100644 --- a/Eigen/src/Core/arch/NEON/Complex.h +++ b/Eigen/src/Core/arch/NEON/Complex.h @@ -275,7 +275,8 @@ ptranspose(PacketBlock& kernel) { //---------- double ---------- #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG -static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000); +const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 }; +static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA ); struct Packet1cd { diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index fa16bc9c8..e1247696d 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -49,14 +49,6 @@ typedef uint32x4_t Packet4ui; #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ const Packet4i p4i_##NAME = pset1(X) -#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG - //Special treatment for Apple's llvm-gcc, its NEON packet types are unions - #define EIGEN_INIT_NEON_PACKET2D(X, Y) {{X, Y}} -#else - //Default initializer for packets - #define EIGEN_INIT_NEON_PACKET2D(X, Y) {X, Y} -#endif - // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function // which available on LLVM and GCC (at least) #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC @@ -584,7 +576,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { r template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { - Packet2d countdown = EIGEN_INIT_NEON_PACKET2D(0, 1); + const double countdown_raw[] = {0.0,1.0}; + const Packet2d countdown = vld1q_f64(countdown_raw); return vaddq_f64(pset1(a), countdown); } template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); } From 2e238bafb69ab0ee2ab2e682d5ac1a43376f9496 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 10 Jun 2016 15:05:43 +0200 Subject: [PATCH 40/85] Big 279: enable mixing types for comparisons, min, and max. --- Eigen/src/Core/Redux.h | 4 +- Eigen/src/Core/functors/BinaryFunctors.h | 74 ++++++++++--------- Eigen/src/Core/util/ForwardDeclarations.h | 7 +- Eigen/src/Core/util/Macros.h | 18 ++--- .../src/Eigenvalues/GeneralizedEigenSolver.h | 32 ++++++-- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 20 ++--- Eigen/src/plugins/CommonCwiseBinaryOps.h | 30 ++++---- Eigen/src/plugins/MatrixCwiseBinaryOps.h | 16 ++-- 8 files changed, 113 insertions(+), 88 deletions(-) diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index ec969d9b9..b6e8f8887 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -425,7 +425,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { - return derived().redux(Eigen::internal::scalar_min_op()); + return derived().redux(Eigen::internal::scalar_min_op()); } /** \returns the maximum of all coefficients of \c *this. @@ -435,7 +435,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { - return derived().redux(Eigen::internal::scalar_max_op()); + return derived().redux(Eigen::internal::scalar_max_op()); } /** \returns the sum of all coefficients of \c *this diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 98fcebae5..c59147db4 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -111,21 +111,22 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() */ -template struct scalar_min_op { +template struct scalar_min_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmin(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_min(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMin + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = internal::is_same::value && packet_traits::HasMin }; }; @@ -134,21 +135,22 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() */ -template struct scalar_max_op { +template struct scalar_max_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { return internal::pmax(a,b); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const { return internal::predux_max(a); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMax + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, + PacketAccess = internal::is_same::value && packet_traits::HasMax }; }; @@ -156,56 +158,56 @@ struct functor_traits > { * \brief Template functors for comparison of two scalars * \todo Implement packet-comparisons */ -template struct scalar_cmp_op; +template struct scalar_cmp_op; -template -struct functor_traits > { +template +struct functor_traits > { enum { - Cost = NumTraits::AddCost, + Cost = (NumTraits::AddCost+NumTraits::AddCost)/2, PacketAccess = false }; }; -template -struct result_of(Scalar,Scalar)> { +template +struct result_of(LhsScalar,RhsScalar)> { typedef bool type; }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);} }; -template struct scalar_cmp_op { +template struct scalar_cmp_op { typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;} }; @@ -214,7 +216,7 @@ template struct scalar_cmp_op { * * \sa MatrixBase::stableNorm(), class Redux */ -template struct scalar_hypot_op { +template struct scalar_hypot_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) // typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const @@ -235,7 +237,7 @@ template struct scalar_hypot_op { } }; template -struct functor_traits > { +struct functor_traits > { enum { Cost = 3 * NumTraits::AddCost + diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 045e22658..340d1f3a5 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -179,6 +179,8 @@ template struct scalar_sum_op; template struct scalar_difference_op; template struct scalar_conj_product_op; +template struct scalar_min_op; +template struct scalar_max_op; template struct scalar_opposite_op; template struct scalar_conjugate_op; template struct scalar_real_op; @@ -201,8 +203,6 @@ template struct scalar_cube_op; template struct scalar_cast_op; template struct scalar_multiple_op; template struct scalar_quotient1_op; -template struct scalar_min_op; -template struct scalar_max_op; template struct scalar_random_op; template struct scalar_add_op; template struct scalar_constant_op; @@ -212,9 +212,10 @@ template struct scalar_igamma_op; template struct scalar_igammac_op; template struct scalar_betainc_op; +template struct scalar_hypot_op; template struct scalar_product_op; -template struct scalar_multiple2_op; template struct scalar_quotient_op; +template struct scalar_multiple2_op; template struct scalar_quotient2_op; } // end namespace internal diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 35547fdda..a51572463 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -876,15 +876,7 @@ namespace Eigen { #define EIGEN_IMPLIES(a,b) (!(a) || (b)) -#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \ - template \ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ - (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ - { \ - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ - } - -// the expression type of a cwise product +// the expression type of a standard coefficient wise binary operation #define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \ CwiseBinaryOp< \ EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \ @@ -895,6 +887,14 @@ namespace Eigen { const RHS \ > +#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \ + (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ + { \ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \ + } + #ifdef EIGEN_EXCEPTIONS # define EIGEN_THROW_X(X) throw X # define EIGEN_THROW throw diff --git a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index a9d6790d5..07a9ccf46 100644 --- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -327,13 +327,33 @@ GeneralizedEigenSolver::compute(const MatrixType& A, const MatrixTyp } else { - Scalar p = Scalar(0.5) * (m_matS.coeff(i, i) - m_matS.coeff(i+1, i+1)); - Scalar z = sqrt(abs(p * p + m_matS.coeff(i+1, i) * m_matS.coeff(i, i+1))); - m_alphas.coeffRef(i) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, z); - m_alphas.coeffRef(i+1) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, -z); + // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a triangular 2x2 block T + // From the eigen decomposition of T = U * E * U^-1, + // we can extract the eigenvalues of (U^-1 * S * U) / E + // Here, we can take advantage that E = diag(T), and U = [ 1 T_01 ; 0 T_11-T_00], and U^-1 = [1 -T_11/(T_11-T_00) ; 0 1/(T_11-T_00)]. + // Then taking beta=T_00*T_11*(T_11-T_00), we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00) * (T_11-T_00): + + // T = [a b ; 0 c] + // S = [e f ; g h] + RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i, i+1), c = m_realQZ.matrixT().coeff(i+1, i+1); + RealScalar e = m_matS.coeff(i, i), f = m_matS.coeff(i, i+1), g = m_matS.coeff(i+1, i), h = m_matS.coeff(i+1, i+1); + RealScalar d = c-a; + RealScalar gb = g*b; + Matrix A; + A << (e*d-gb)*c, ((e*b+f*d-h*b)*d-gb*b)*a, + g*c , (gb+h*d)*a; + + // NOTE, we could also compute the SVD of T's block during the QZ factorization so that the respective T block is guaranteed to be diagonal, + // and then we could directly apply the formula below (while taking care of scaling S columns by T11,T00): + + Scalar p = Scalar(0.5) * (A.coeff(i, i) - A.coeff(i+1, i+1)); + Scalar z = sqrt(abs(p * p + A.coeff(i+1, i) * A.coeff(i, i+1))); + m_alphas.coeffRef(i) = ComplexScalar(A.coeff(i+1, i+1) + p, z); + m_alphas.coeffRef(i+1) = ComplexScalar(A.coeff(i+1, i+1) + p, -z); + + m_betas.coeffRef(i) = + m_betas.coeffRef(i+1) = a*c*d; - m_betas.coeffRef(i) = m_realQZ.matrixT().coeff(i,i); - m_betas.coeffRef(i+1) = m_realQZ.matrixT().coeff(i,i); i += 2; } } diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index c6ea12c90..da47a7ed7 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -29,14 +29,14 @@ operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * * \sa max() */ -EIGEN_MAKE_CWISE_BINARY_OP(min,internal::scalar_min_op) +EIGEN_MAKE_CWISE_BINARY_OP(min,min) /** \returns an expression of the coefficient-wise min of \c *this and scalar \a other * * \sa max() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > #ifdef EIGEN_PARSED_BY_DOXYGEN min @@ -55,14 +55,14 @@ min * * \sa min() */ -EIGEN_MAKE_CWISE_BINARY_OP(max,internal::scalar_max_op) +EIGEN_MAKE_CWISE_BINARY_OP(max,max) /** \returns an expression of the coefficient-wise max of \c *this and scalar \a other * * \sa min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > #ifdef EIGEN_PARSED_BY_DOXYGEN max @@ -95,13 +95,13 @@ pow(const ArrayBase& exponents) const // TODO code generating macros could be moved to Macros.h and could include generation of documentation #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \ template \ -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ { \ - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ }\ -typedef CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \ -typedef CwiseBinaryOp, const CwiseNullaryOp, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \ +typedef CwiseBinaryOp, const Derived, const CwiseNullaryOp, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \ +typedef CwiseBinaryOp, const CwiseNullaryOp, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \ OP(const Scalar& s) const { \ return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \ @@ -113,10 +113,10 @@ OP(const Scalar& s, const Derived& d) { \ #define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \ template \ -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const OtherDerived, const Derived> \ +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp, const OtherDerived, const Derived> \ OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ { \ - return CwiseBinaryOp, const OtherDerived, const Derived>(other.derived(), derived()); \ + return CwiseBinaryOp, const OtherDerived, const Derived>(other.derived(), derived()); \ } \ EIGEN_DEVICE_FUNC \ inline const RCmp ## RCOMPARATOR ## ReturnType \ diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index 3c13b7f06..6a5ccc1aa 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -16,13 +16,14 @@ * * \sa class CwiseBinaryOp, operator-=() */ -template -EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference) -operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const -{ - return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference)(derived(), other.derived()); -} +EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference) +// template +// EIGEN_DEVICE_FUNC +// EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference) +// operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const +// { +// return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference)(derived(), other.derived()); +// } /** \returns an expression of the sum of \c *this and \a other * @@ -30,13 +31,14 @@ operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * * \sa class CwiseBinaryOp, operator+=() */ -template -EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum) -operator+(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const -{ - return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum)(derived(), other.derived()); -} +EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum) +// template +// EIGEN_DEVICE_FUNC +// EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum) +// operator+(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const +// { +// return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum)(derived(), other.derived()); +// } /** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other * diff --git a/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/Eigen/src/plugins/MatrixCwiseBinaryOps.h index 59581e618..f1084abef 100644 --- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h +++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h @@ -74,10 +74,10 @@ cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise min of *this and scalar \a other @@ -85,7 +85,7 @@ cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * \sa class CwiseBinaryOp, min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> cwiseMin(const Scalar &other) const { return cwiseMin(Derived::Constant(rows(), cols(), other)); @@ -100,10 +100,10 @@ cwiseMin(const Scalar &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise max of *this and scalar \a other @@ -111,7 +111,7 @@ cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const * \sa class CwiseBinaryOp, min() */ EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> cwiseMax(const Scalar &other) const { return cwiseMax(Derived::Constant(rows(), cols(), other)); @@ -133,7 +133,7 @@ cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } -typedef CwiseBinaryOp, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType; +typedef CwiseBinaryOp, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType; /** \returns an expression of the coefficient-wise == operator of \c *this and a scalar \a s * @@ -148,5 +148,5 @@ EIGEN_DEVICE_FUNC inline const CwiseScalarEqualReturnType cwiseEqual(const Scalar& s) const { - return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op()); + return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op()); } From 5fdd7036293aef88e5bdfe30acc74206486d82af Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 10 Jun 2016 15:58:04 +0200 Subject: [PATCH 41/85] Enable mixing types in numext::pow --- Eigen/src/Core/MathFunctions.h | 35 +++++++++-------------- Eigen/src/Core/functors/BinaryFunctors.h | 3 +- Eigen/src/Core/util/ForwardDeclarations.h | 1 + 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index ece04b754..2a05ae12d 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -494,24 +494,26 @@ struct log1p_retval * Implementation of pow * ****************************************************************************/ -template -struct pow_default_impl +template::IsInteger&&NumTraits::IsInteger> +struct pow_impl { - typedef Scalar retval; - static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) + //typedef Scalar retval; + typedef typename ScalarBinaryOpTraits >::ReturnType result_type; + static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) { EIGEN_USING_STD_MATH(pow); return pow(x, y); } }; -template -struct pow_default_impl +template +struct pow_impl { - static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y) + typedef ScalarX result_type; + static EIGEN_DEVICE_FUNC inline ScalarX run(const ScalarX &x, const ScalarY &y) { - Scalar res(1); - eigen_assert(!NumTraits::IsSigned || y >= 0); + ScalarX res(1); + eigen_assert(!NumTraits::IsSigned || y >= 0); if(y & 1) res *= x; y >>= 1; while(y) @@ -524,15 +526,6 @@ struct pow_default_impl } }; -template -struct pow_impl : pow_default_impl::IsInteger> {}; - -template -struct pow_retval -{ - typedef Scalar type; -}; - /**************************************************************************** * Implementation of random * ****************************************************************************/ @@ -928,11 +921,11 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -template +template EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) +inline typename internal::pow_impl::result_type pow(const ScalarX& x, const ScalarY& y) { - return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); + return internal::pow_impl::run(x, y); } template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index c59147db4..ec06499a0 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -251,9 +251,10 @@ struct functor_traits > { * \brief Template functor to compute the pow of two scalars */ template struct scalar_binary_pow_op { + typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) EIGEN_DEVICE_FUNC - inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } + inline result_type operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } }; template struct functor_traits > { diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 340d1f3a5..760d6bcf6 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -217,6 +217,7 @@ template struct scalar_product_ template struct scalar_quotient_op; template struct scalar_multiple2_op; template struct scalar_quotient2_op; +template struct scalar_binary_pow_op; } // end namespace internal From 5de8d7036b8b48bc4986d96f677c77eb466ce02b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 10 Jun 2016 15:58:22 +0200 Subject: [PATCH 42/85] Add real.pow(complex), complex.pow(real) unit tests. --- test/mixingtypes.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index 2af188b25..d719a3c53 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -196,6 +196,9 @@ template void mixingtypes(int size = SizeAtCompileType) // VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); + VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( mcd.array().pow(md.array()), mcd.array().pow(md.template cast().eval().array()) ); + rcd = mcd; VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); rcd = mcd; From fabae6c9a180669f6afbd732ee6973311a36476c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 10 Jun 2016 15:58:33 +0200 Subject: [PATCH 43/85] Cleanup --- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 12 ++---------- Eigen/src/plugins/CommonCwiseBinaryOps.h | 14 -------------- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index da47a7ed7..7f7d0dbf4 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -81,16 +81,8 @@ max * Example: \include Cwise_array_power_array.cpp * Output: \verbinclude Cwise_array_power_array.out */ -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -const CwiseBinaryOp, const Derived, const ExponentDerived> -pow(const ArrayBase& exponents) const -{ - return CwiseBinaryOp, const Derived, const ExponentDerived>( - this->derived(), - exponents.derived() - ); -} +EIGEN_MAKE_CWISE_BINARY_OP(pow,binary_pow) + // TODO code generating macros could be moved to Macros.h and could include generation of documentation #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \ diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index 6a5ccc1aa..f16be7bea 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -17,13 +17,6 @@ * \sa class CwiseBinaryOp, operator-=() */ EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference) -// template -// EIGEN_DEVICE_FUNC -// EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference) -// operator-(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const -// { -// return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,difference)(derived(), other.derived()); -// } /** \returns an expression of the sum of \c *this and \a other * @@ -32,13 +25,6 @@ EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference) * \sa class CwiseBinaryOp, operator+=() */ EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum) -// template -// EIGEN_DEVICE_FUNC -// EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum) -// operator+(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const -// { -// return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,sum)(derived(), other.derived()); -// } /** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other * From a05607875a57129821f81e6aae1727357f4db5e6 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 10 Jun 2016 11:53:56 -0700 Subject: [PATCH 44/85] Don't refer to the half2 type unless it's been defined --- unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h index 0f3778e6e..fdb5ee6b8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -52,7 +52,7 @@ struct PacketType : internal::packet_traits { }; // For CUDA packet types when using a GpuDevice -#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16) template <> struct PacketType { typedef half2 type; From 83904a21c11ffdb88f3ad8a65ded7bf46c1a068a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 11 Jun 2016 14:41:36 +0200 Subject: [PATCH 45/85] Make sure T(i+1,i)==0 when diagonalizing T(i:i+1,i:i+1) --- Eigen/src/Eigenvalues/RealQZ.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Eigenvalues/RealQZ.h b/Eigen/src/Eigenvalues/RealQZ.h index c4715b954..b3a910dd9 100644 --- a/Eigen/src/Eigenvalues/RealQZ.h +++ b/Eigen/src/Eigenvalues/RealQZ.h @@ -630,11 +630,11 @@ namespace Eigen { internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right); // Apply resulting Jacobi rotations - m_T.applyOnTheLeft(i,i+1,j_left); - m_T.applyOnTheRight(i,i+1,j_right); m_S.applyOnTheLeft(i,i+1,j_left); m_S.applyOnTheRight(i,i+1,j_right); - m_T(i,i+1) = Scalar(0); + m_T.applyOnTheLeft(i,i+1,j_left); + m_T.applyOnTheRight(i,i+1,j_right); + m_T(i+1,i) = m_T(i,i+1) = Scalar(0); if(m_computeQZ) { m_Q.applyOnTheRight(i,i+1,j_left.transpose()); From a3a4714abac02ba48a683c3c3967cebee2833188 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 11 Jun 2016 14:41:53 +0200 Subject: [PATCH 46/85] Add debug output. --- test/real_qz.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/real_qz.cpp b/test/real_qz.cpp index a1766c6d9..45ae8d763 100644 --- a/test/real_qz.cpp +++ b/test/real_qz.cpp @@ -49,11 +49,20 @@ template void real_qz(const MatrixType& m) for (Index i=0; i void mixingtypes(int size = SizeAtCompileType) // check scalar products VERIFY_IS_APPROX(vcf * sf , vcf * complex(sf)); - VERIFY_IS_APPROX(sd * vcd, complex(sd) * vcd); + VERIFY_IS_APPROX(sd * vcd , complex(sd) * vcd); VERIFY_IS_APPROX(vf * scf , vf.template cast >() * scf); - VERIFY_IS_APPROX(scd * vd, scd * vd.template cast >()); + VERIFY_IS_APPROX(scd * vd , scd * vd.template cast >()); + + // check scalar quotients + VERIFY_IS_APPROX(vcf / sf , vcf / complex(sf)); + VERIFY_IS_APPROX(vf / scf , vf.template cast >() / scf); // check dot product vf.dot(vf); From 7a9ef7bbb4d71e9ea5150a244238e62d350a8896 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 13 Jun 2016 16:17:23 +0200 Subject: [PATCH 49/85] Add default template parameters for the second scalar type of binary functors. This enhences backward compatibility. --- Eigen/src/Core/util/ForwardDeclarations.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 760d6bcf6..af3dce21b 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -176,11 +176,11 @@ namespace internal { // with optional conjugation of the arguments. template struct conj_helper; -template struct scalar_sum_op; -template struct scalar_difference_op; -template struct scalar_conj_product_op; -template struct scalar_min_op; -template struct scalar_max_op; +template struct scalar_sum_op; +template struct scalar_difference_op; +template struct scalar_conj_product_op; +template struct scalar_min_op; +template struct scalar_max_op; template struct scalar_opposite_op; template struct scalar_conjugate_op; template struct scalar_real_op; From 3c12e241643ebea97706c9ed7412e9a7c1bf16d0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 13 Jun 2016 16:18:59 +0200 Subject: [PATCH 50/85] Add bind1st_op and bind2nd_op helpers to turn binary functors into unary ones, and implement scalar_multiple2 and scalar_quotient2 on top of them. --- Eigen/src/Core/functors/BinaryFunctors.h | 140 +++++++++++++----- Eigen/src/Core/util/ForwardDeclarations.h | 2 - Eigen/src/plugins/CommonCwiseUnaryOps.h | 23 ++- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 8 +- 4 files changed, 115 insertions(+), 58 deletions(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index ec06499a0..637514a20 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -16,12 +16,21 @@ namespace internal { //---------- associative binary functors ---------- +template +struct binary_op_base +{ + typedef Arg1 first_argument_type; + typedef Arg2 second_argument_type; +}; + /** \internal * \brief Template functor to compute the sum of two scalars * * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum() */ -template struct scalar_sum_op { +template +struct scalar_sum_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; } @@ -57,7 +66,9 @@ template<> struct scalar_sum_op : scalar_sum_op { * * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() */ -template struct scalar_product_op { +template +struct scalar_product_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } @@ -82,7 +93,9 @@ struct functor_traits > { * * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) */ -template struct scalar_conj_product_op { +template +struct scalar_conj_product_op : binary_op_base +{ enum { Conj = NumTraits::IsComplex @@ -111,7 +124,9 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() */ -template struct scalar_min_op { +template +struct scalar_min_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); } @@ -135,7 +150,9 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() */ -template struct scalar_max_op { +template +struct scalar_max_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); } @@ -174,37 +191,51 @@ struct result_of(LhsScalar,RhsScalar)> }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);} }; -template struct scalar_cmp_op { +template +struct scalar_cmp_op : binary_op_base +{ typedef bool result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;} @@ -216,7 +247,9 @@ template struct scalar_cmp_op struct scalar_hypot_op { +template +struct scalar_hypot_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) // typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const @@ -250,7 +283,9 @@ struct functor_traits > { /** \internal * \brief Template functor to compute the pow of two scalars */ -template struct scalar_binary_pow_op { +template +struct scalar_binary_pow_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) EIGEN_DEVICE_FUNC @@ -270,7 +305,9 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, MatrixBase::operator- */ -template struct scalar_difference_op { +template +struct scalar_difference_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; } @@ -291,7 +328,9 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, Cwise::operator/() */ -template struct scalar_quotient_op { +template +struct scalar_quotient_op : binary_op_base +{ typedef typename ScalarBinaryOpTraits::ReturnType result_type; EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } @@ -363,7 +402,8 @@ template<> struct functor_traits { * * \sa class CwiseBinaryOp, Cwise::igamma */ -template struct scalar_igamma_op { +template struct scalar_igamma_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { using numext::igamma; return igamma(a, x); @@ -388,7 +428,8 @@ struct functor_traits > { * * \sa class CwiseBinaryOp, Cwise::igammac */ -template struct scalar_igammac_op { +template struct scalar_igammac_op : binary_op_base +{ EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const { using numext::igammac; return igammac(a, x); @@ -411,6 +452,47 @@ struct functor_traits > { //---------- binary functors bound to a constant, thus appearing as a unary functor ---------- +// The following two classes permits to turn any binary functor into a unary one with one argument bound to a constant value. +// They are analogues to std::binder1st/binder2nd but with the following differences: +// - they are compatible with packetOp +// - they are portable across C++ versions (the std::binder* are deprecated in C++11) +template struct bind1st_op : BinaryOp { + + typedef typename BinaryOp::first_argument_type first_argument_type; + typedef typename BinaryOp::second_argument_type second_argument_type; + typedef typename BinaryOp::result_type result_type; + + bind1st_op(const first_argument_type &val) : m_value(val) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const + { return BinaryOp::packetOp(internal::pset1(m_value), b); } + + first_argument_type m_value; +}; +template struct functor_traits > : functor_traits {}; + + +template struct bind2nd_op : BinaryOp { + + typedef typename BinaryOp::first_argument_type first_argument_type; + typedef typename BinaryOp::second_argument_type second_argument_type; + typedef typename BinaryOp::result_type result_type; + + bind2nd_op(const second_argument_type &val) : m_value(val) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return BinaryOp::packetOp(a,internal::pset1(m_value)); } + + second_argument_type m_value; +}; +template struct functor_traits > : functor_traits {}; + /** \internal * \brief Template functor to multiply a scalar by a fixed other one * @@ -442,17 +524,6 @@ template struct functor_traits > { enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; -template -struct scalar_multiple2_op { - typedef typename ScalarBinaryOpTraits::ReturnType result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; /** \internal * \brief Template functor to divide a scalar by a fixed other one @@ -477,17 +548,6 @@ template struct functor_traits > { enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; -template -struct scalar_quotient2_op { - typedef typename ScalarBinaryOpTraits::ReturnType result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; /** \internal diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index af3dce21b..84d505d4a 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -215,8 +215,6 @@ template struct scalar_betainc_op; template struct scalar_hypot_op; template struct scalar_product_op; template struct scalar_quotient_op; -template struct scalar_multiple2_op; -template struct scalar_quotient2_op; template struct scalar_binary_pow_op; } // end namespace internal diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index 927167aff..80dc46cd4 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -14,7 +14,6 @@ /** \internal Represents a scalar multiple of an expression */ typedef CwiseUnaryOp, const Derived> ScalarMultipleReturnType; -typedef CwiseUnaryOp >, const Derived> ScalarComplexMultipleReturnType; /** \internal Represents a quotient of an expression by a scalar*/ typedef CwiseUnaryOp, const Derived> ScalarQuotient1ReturnType; @@ -73,15 +72,15 @@ operator/(const Scalar& scalar) const /** Overloaded for efficiently multipling with compatible scalar types */ template EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, - const CwiseUnaryOp, const Derived> >::type +typename internal::enable_if::Defined, + const CwiseUnaryOp >, const Derived> >::type operator*(const T& scalar) const { #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #endif - return CwiseUnaryOp, const Derived>( - derived(), internal::scalar_multiple2_op(scalar) ); + typedef internal::bind2nd_op > op; + return CwiseUnaryOp(derived(), op(scalar) ); } EIGEN_DEVICE_FUNC @@ -91,28 +90,28 @@ operator*(const Scalar& scalar, const StorageBaseType& matrix) template EIGEN_DEVICE_FUNC inline friend -typename internal::enable_if::Defined, - const CwiseUnaryOp, const Derived> >::type +typename internal::enable_if::Defined, + const CwiseUnaryOp >, const Derived> >::type operator*(const T& scalar, const StorageBaseType& matrix) { #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #endif - return CwiseUnaryOp, const Derived>( - matrix.derived(), internal::scalar_multiple2_op(scalar) ); + typedef internal::bind1st_op > op; + return CwiseUnaryOp(matrix.derived(), op(scalar) ); } template EIGEN_DEVICE_FUNC inline typename internal::enable_if::Defined, - const CwiseUnaryOp, const Derived> >::type + const CwiseUnaryOp >, const Derived> >::type operator/(const T& scalar) const { #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #endif - return CwiseUnaryOp, const Derived>( - derived(), internal::scalar_quotient2_op(scalar) ); + typedef internal::bind2nd_op > op; + return CwiseUnaryOp(derived(), op(scalar) ); } template struct CastXpr { typedef typename internal::cast_return_type, const Derived> >::type Type; }; diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index ba61288a3..0ed91fdb7 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -426,18 +426,18 @@ struct auto_diff_special_op<_DerType, true> } - inline const AutoDiffScalar, DerType>::Type > + inline const AutoDiffScalar >, DerType>::Type > operator*(const Real& other) const { - return AutoDiffScalar, DerType>::Type >( + return AutoDiffScalar >, DerType>::Type >( derived().value() * other, derived().derivatives() * other); } - friend inline const AutoDiffScalar, DerType>::Type > + friend inline const AutoDiffScalar >, DerType>::Type > operator*(const Real& other, const AutoDiffScalar<_DerType>& a) { - return AutoDiffScalar, DerType>::Type >( + return AutoDiffScalar >, DerType>::Type >( a.value() * other, a.derivatives() * other); } From 39781dc1e230a2ece36b484dbef76f66d2136f86 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:03:26 +0200 Subject: [PATCH 51/85] Fix compilation of evaluator unit test --- test/evaluators.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/evaluators.cpp b/test/evaluators.cpp index 876dffe22..aed5a05a7 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -21,7 +21,7 @@ namespace Eigen { EIGEN_STRONG_INLINE DstXprType& copy_using_evaluator(const EigenBase &dst, const SrcXprType &src) { - call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); return dst.const_cast_derived(); } @@ -29,7 +29,7 @@ namespace Eigen { EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const NoAlias& dst, const SrcXprType &src) { - call_assignment(dst, src.derived(), internal::assign_op()); + call_assignment(dst, src.derived(), internal::assign_op()); return dst.expression(); } @@ -45,7 +45,7 @@ namespace Eigen { dst.const_cast_derived().resizeLike(src.derived()); #endif - call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); return dst.const_cast_derived(); } @@ -53,28 +53,28 @@ namespace Eigen { void add_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); + call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); } template void subtract_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); + call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); } template void multiply_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); } template void divide_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) { typedef typename DstXprType::Scalar Scalar; - call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); + call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); } template From 64fcfd314f594bbc358aa8e2b8dc8def9b288b66 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:26:57 +0200 Subject: [PATCH 52/85] Implement scalar multiples and division by a scalar as a binary-expression with a constant expression. This slightly complexifies the type of the expressions and implies that we now have to distinguish between scalar*expr and expr*scalar to catch scalar-multiple expression (e.g., see BlasUtil.h), but this brings several advantages: - it makes it clear on each side the scalar is applied, - it clearly reflects that we are dealing with a binary-expression, - the complexity of the type is hidden through macros defined at the end of Macros.h, - distinguishing between "scalar op expr" and "expr op scalar" is important to support non commutative fields (like quaternions) - "scalar op expr" is now fully equivalent to "ConstantExpr(scalar) op expr" - scalar_multiple_op, scalar_quotient1_op and scalar_quotient2_op are not used anymore in officially supported modules (still used in Tensor) --- Eigen/src/Core/DiagonalMatrix.h | 9 ++- Eigen/src/Core/ProductEvaluators.h | 37 +++++++----- Eigen/src/Core/SelfAdjointView.h | 2 +- Eigen/src/Core/util/BlasUtil.h | 24 ++++++++ Eigen/src/Core/util/ForwardDeclarations.h | 2 - Eigen/src/Core/util/Macros.h | 45 ++++++++++++++ Eigen/src/Core/util/XprHelper.h | 14 +++++ Eigen/src/Geometry/AlignedBox.h | 8 +-- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 1 + Eigen/src/plugins/CommonCwiseBinaryOps.h | 30 ++++++++++ Eigen/src/plugins/CommonCwiseUnaryOps.h | 71 ----------------------- test/array.cpp | 2 +- 12 files changed, 146 insertions(+), 99 deletions(-) diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index aa619dd5c..d6f89bced 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -71,18 +71,17 @@ class DiagonalBase : public EigenBase return InverseReturnType(diagonal().cwiseInverse()); } - typedef DiagonalWrapper, const DiagonalVectorType> > ScalarMultipleReturnType; EIGEN_DEVICE_FUNC - inline const ScalarMultipleReturnType + inline const DiagonalWrapper operator*(const Scalar& scalar) const { - return ScalarMultipleReturnType(diagonal() * scalar); + return DiagonalWrapper(diagonal() * scalar); } EIGEN_DEVICE_FUNC - friend inline const ScalarMultipleReturnType + friend inline const DiagonalWrapper operator*(const Scalar& scalar, const DiagonalBase& other) { - return ScalarMultipleReturnType(other.diagonal() * scalar); + return DiagonalWrapper(scalar * other.diagonal()); } }; diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 71ae6e54c..77549e709 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -35,22 +35,28 @@ struct evaluator > EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} }; -// Catch scalar * ( A * B ) and transform it to (A*scalar) * B +// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" // TODO we should apply that rule only if that's really helpful -template -struct evaluator_assume_aliasing, const Product > > +template +struct evaluator_assume_aliasing, + const CwiseNullaryOp, Plain1>, + const Product > > { static const bool value = true; }; -template -struct evaluator, const Product > > - : public evaluator,const Lhs>, Rhs, DefaultProduct> > +template +struct evaluator, + const CwiseNullaryOp, Plain1>, + const Product > > + : public evaluator > { - typedef CwiseUnaryOp, const Product > XprType; - typedef evaluator,const Lhs>, Rhs, DefaultProduct> > Base; - + typedef CwiseBinaryOp, + const CwiseNullaryOp, Plain1>, + const Product > XprType; + typedef evaluator > Base; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) - : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs()) + : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} }; @@ -171,16 +177,17 @@ struct Assignment, internal::sub_assign_op< // Dense ?= scalar * Product // TODO we should apply that rule if that's really helpful // for instance, this is not good for inner products -template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis> -struct Assignment, +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> +struct Assignment, const CwiseNullaryOp,Plain>, const Product >, AssignFunc, Dense2Dense, Scalar> { - typedef CwiseUnaryOp, - const Product > SrcXprType; + typedef CwiseBinaryOp, + const CwiseNullaryOp,Plain>, + const Product > SrcXprType; static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) { - call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func); + call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); } }; diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 92c541f08..62d4180da 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -129,7 +129,7 @@ template class SelfAdjointView } friend EIGEN_DEVICE_FUNC - const SelfAdjointView,MatrixType>,UpLo> + const SelfAdjointView operator*(const Scalar& s, const SelfAdjointView& mat) { return (s*mat.nestedExpression()).template selfadjointView(); diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index c163f1458..7e8f90d88 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -293,6 +293,30 @@ struct blas_traits, NestedXpr> > }; // pop scalar multiple +template +struct blas_traits, const CwiseNullaryOp,Plain>, NestedXpr> > + : blas_traits +{ + typedef blas_traits Base; + typedef CwiseBinaryOp, const CwiseNullaryOp,Plain>, NestedXpr> XprType; + typedef typename Base::ExtractType ExtractType; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } + static inline Scalar extractScalarFactor(const XprType& x) + { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); } +}; +template +struct blas_traits, NestedXpr, const CwiseNullaryOp,Plain> > > + : blas_traits +{ + typedef blas_traits Base; + typedef CwiseBinaryOp, NestedXpr, const CwiseNullaryOp,Plain> > XprType; + typedef typename Base::ExtractType ExtractType; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); } + static inline Scalar extractScalarFactor(const XprType& x) + { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; } +}; + +// pop scalar multiple (using deprecated scalar_multiple_op) template struct blas_traits, NestedXpr> > : blas_traits diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 84d505d4a..8334446d6 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -201,8 +201,6 @@ template struct scalar_inverse_op; template struct scalar_square_op; template struct scalar_cube_op; template struct scalar_cast_op; -template struct scalar_multiple_op; -template struct scalar_quotient1_op; template struct scalar_random_op; template struct scalar_add_op; template struct scalar_constant_op; diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index a51572463..f3c6512c9 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -895,6 +895,51 @@ namespace Eigen { return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \ } +#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \ + CwiseBinaryOp::Scalar,SCALAR>, const EXPR, \ + const typename internal::plain_constant_type::type> + +#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \ + CwiseBinaryOp::Scalar>, \ + const typename internal::plain_constant_type::type, const EXPR> + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \ + EIGEN_DEVICE_FUNC inline \ + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,OPNAME) \ + (METHOD)(const Scalar& scalar) const { \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,OPNAME)(derived(), \ + typename internal::plain_constant_type::type(derived().rows(), derived().cols(), scalar)); \ + } \ + \ + template EIGEN_DEVICE_FUNC inline \ + typename internal::enable_if::Defined, \ + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME) >::type \ + (METHOD)(const T& scalar) const { \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME)(derived(), \ + typename internal::plain_constant_type::type(derived().rows(), derived().cols(), scalar)); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + EIGEN_DEVICE_FUNC inline friend \ + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,OPNAME) \ + (METHOD)(const Scalar& scalar, const StorageBaseType& matrix) { \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,OPNAME)( \ + typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), scalar), matrix.derived()); \ + } \ + \ + template EIGEN_DEVICE_FUNC inline friend \ + typename internal::enable_if::Defined, \ + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME) >::type \ + (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME)( \ + typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), scalar), matrix.derived()); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) + + #ifdef EIGEN_EXCEPTIONS # define EIGEN_THROW_X(X) throw X # define EIGEN_THROW throw diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 828813161..c41c408b0 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -576,6 +576,20 @@ struct plain_diag_type >::type type; }; +template +struct plain_constant_type +{ + enum { Options = (traits::Flags&RowMajorBit)?RowMajor:0 }; + + typedef Array::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> array_type; + + typedef Matrix::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> matrix_type; + + typedef CwiseNullaryOp, const typename conditional::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type; +}; + template struct is_lvalue { diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index aeb043a6c..d20d17492 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -36,8 +36,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) typedef NumTraits ScalarTraits; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 typedef typename ScalarTraits::Real RealScalar; - typedef typename ScalarTraits::NonInteger NonInteger; + typedef typename ScalarTraits::NonInteger NonInteger; typedef Matrix VectorType; + typedef CwiseBinaryOp, const VectorType, const VectorType> VectorTypeSum; /** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */ enum CornerType @@ -111,10 +112,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) inline VectorType& (max)() { return m_max; } /** \returns the center of the box */ - inline const CwiseUnaryOp, - const CwiseBinaryOp, const VectorType, const VectorType> > + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient) center() const - { return (m_min+m_max)/2; } + { return (m_min+m_max)/RealScalar(2); } /** \returns the lengths of the sides of the bounding box. * Note that this function does not get the same diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 7f7d0dbf4..351762e82 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -1,3 +1,4 @@ + /** \returns an expression of the coefficient wise product of \c *this and \a other * * \sa MatrixBase::cwiseProduct diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index f16be7bea..c5eaea16d 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -45,3 +45,33 @@ binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other, const Cu return CwiseBinaryOp(derived(), other.derived(), func); } + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product); +#else +/** \returns an expression of \c *this scaled by the scalar factor \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template +const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; +/** \returns an expression of \c *this scaled by the scalar factor \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator*(const T& scalar, const StorageBaseType& expr); +#endif + + + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient); +#else +/** \returns an expression of \c *this divided by the scalar value \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template +const CwiseBinaryOp,Derived,Constant > operator/(const T& scalar) const; +#endif diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h index 80dc46cd4..6cd5479a0 100644 --- a/Eigen/src/plugins/CommonCwiseUnaryOps.h +++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h @@ -12,11 +12,6 @@ #ifndef EIGEN_PARSED_BY_DOXYGEN -/** \internal Represents a scalar multiple of an expression */ -typedef CwiseUnaryOp, const Derived> ScalarMultipleReturnType; - -/** \internal Represents a quotient of an expression by a scalar*/ -typedef CwiseUnaryOp, const Derived> ScalarQuotient1ReturnType; /** \internal the return type of conjugate() */ typedef typename internal::conditional::IsComplex, const CwiseUnaryOp, const Derived>, @@ -38,7 +33,6 @@ typedef CwiseUnaryOp, const Derived> ImagReturn typedef CwiseUnaryView, Derived> NonConstImagReturnType; typedef CwiseUnaryOp, const Derived> NegativeReturnType; -//typedef CwiseUnaryOp, const Derived> #endif // not EIGEN_PARSED_BY_DOXYGEN @@ -49,71 +43,6 @@ inline const NegativeReturnType operator-() const { return NegativeReturnType(derived()); } -/** \returns an expression of \c *this scaled by the scalar factor \a scalar */ -EIGEN_DEVICE_FUNC -inline const ScalarMultipleReturnType -operator*(const Scalar& scalar) const -{ - return ScalarMultipleReturnType(derived(), internal::scalar_multiple_op(scalar)); -} - -#ifdef EIGEN_PARSED_BY_DOXYGEN -const ScalarMultipleReturnType operator*(const RealScalar& scalar) const; -#endif - -/** \returns an expression of \c *this divided by the scalar value \a scalar */ -EIGEN_DEVICE_FUNC -inline const ScalarQuotient1ReturnType -operator/(const Scalar& scalar) const -{ - return ScalarQuotient1ReturnType(derived(), internal::scalar_quotient1_op(scalar)); -} - -/** Overloaded for efficiently multipling with compatible scalar types */ -template -EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, - const CwiseUnaryOp >, const Derived> >::type -operator*(const T& scalar) const -{ -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - typedef internal::bind2nd_op > op; - return CwiseUnaryOp(derived(), op(scalar) ); -} - -EIGEN_DEVICE_FUNC -inline friend const ScalarMultipleReturnType -operator*(const Scalar& scalar, const StorageBaseType& matrix) -{ return matrix*scalar; } - -template -EIGEN_DEVICE_FUNC inline friend -typename internal::enable_if::Defined, - const CwiseUnaryOp >, const Derived> >::type -operator*(const T& scalar, const StorageBaseType& matrix) -{ -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - typedef internal::bind1st_op > op; - return CwiseUnaryOp(matrix.derived(), op(scalar) ); -} - -template -EIGEN_DEVICE_FUNC inline -typename internal::enable_if::Defined, - const CwiseUnaryOp >, const Derived> >::type -operator/(const T& scalar) const -{ -#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN - EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN -#endif - typedef internal::bind2nd_op > op; - return CwiseUnaryOp(derived(), op(scalar) ); -} - template struct CastXpr { typedef typename internal::cast_return_type, const Derived> >::type Type; }; /** \returns an expression of *this with the \a Scalar type casted to diff --git a/test/array.cpp b/test/array.cpp index bd470d5f7..0416ec5d2 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -807,7 +807,7 @@ void test_array() VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, int >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, float >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); - typedef CwiseUnaryOp, ArrayXd > Xpr; + typedef CwiseUnaryOp, ArrayXd > Xpr; VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); From deb8306e60c53f052d1df16dce38a4b6c6c98aab Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:28:03 +0200 Subject: [PATCH 53/85] Move MatrixBase::operaotr*(UniformScaling) as a free function in Scaling.h, and fix return type. --- Eigen/src/Core/MatrixBase.h | 4 +--- Eigen/src/Geometry/Scaling.h | 11 +++++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index f63505fef..d9d2426ad 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -403,7 +403,6 @@ template class MatrixBase inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; - inline ScalarMultipleReturnType operator*(const UniformScaling& s) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) : ColsAtCompileTime==1 ? Vertical : Horizontal }; @@ -416,8 +415,7 @@ template class MatrixBase typedef Block::ColsAtCompileTime==1 ? SizeMinusOne : 1, internal::traits::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; - typedef CwiseUnaryOp::Scalar>, - const ConstStartMinusOne > HNormalizedReturnType; + typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType; inline const HNormalizedReturnType hnormalized() const; diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h index 643138199..3e12681b0 100644 --- a/Eigen/src/Geometry/Scaling.h +++ b/Eigen/src/Geometry/Scaling.h @@ -107,12 +107,15 @@ public: /** \addtogroup Geometry_Module */ //@{ -/** Concatenates a linear transformation matrix and a uniform scaling */ +/** Concatenates a linear transformation matrix and a uniform scaling + * \relates UniformScaling + */ // NOTE this operator is defiend in MatrixBase and not as a friend function // of UniformScaling to fix an internal crash of Intel's ICC -template typename MatrixBase::ScalarMultipleReturnType -MatrixBase::operator*(const UniformScaling& s) const -{ return derived() * s.factor(); } +template +EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,product) +operator*(const MatrixBase& matrix, const UniformScaling& s) +{ return matrix.derived() * s.factor(); } /** Constructs a uniform scaling from scale factor \a s */ static inline UniformScaling Scaling(float s) { return UniformScaling(s); } From f5b1c7394537cc74978617383628c3b6b9399e57 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:29:06 +0200 Subject: [PATCH 54/85] Set cost of constant expression to 0 (the cost should be amortized through the expression) --- Eigen/src/Core/functors/NullaryFunctors.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 78cc22277..eaa582f23 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -26,7 +26,8 @@ struct scalar_constant_op { }; template struct functor_traits > -{ enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; +{ enum { Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */, + PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; template struct scalar_identity_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) From f57fd78e308c80d25b1f8c92ae8db2bdc5045a29 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:29:54 +0200 Subject: [PATCH 55/85] Generalize coeff-wise sparse products to support different scalar types --- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 27 ++++++++++++---------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index dd21eb8c5..aad7b7d79 100644 --- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -28,6 +28,9 @@ namespace Eigen { // generic sparse // 4 - dense op dense product dense // generic dense +// +// TODO to ease compiler job, we could specialize product/quotient with a scalar +// and fallback to cwise-unary evaluator using bind1st_op and bind2nd_op. template class CwiseBinaryOpImpl @@ -323,12 +326,12 @@ protected: }; // "sparse .* sparse" -template -struct binary_evaluator, Lhs, Rhs>, IteratorBased, IteratorBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IteratorBased, IteratorBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef typename evaluator::InnerIterator LhsIterator; typedef typename evaluator::InnerIterator RhsIterator; typedef CwiseBinaryOp XprType; @@ -407,12 +410,12 @@ protected: }; // "dense .* sparse" -template -struct binary_evaluator, Lhs, Rhs>, IndexBased, IteratorBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IndexBased, IteratorBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef evaluator LhsEvaluator; typedef typename evaluator::InnerIterator RhsIterator; typedef CwiseBinaryOp XprType; @@ -480,12 +483,12 @@ protected: }; // "sparse .* dense" -template -struct binary_evaluator, Lhs, Rhs>, IteratorBased, IndexBased> - : evaluator_base, Lhs, Rhs> > +template +struct binary_evaluator, Lhs, Rhs>, IteratorBased, IndexBased> + : evaluator_base, Lhs, Rhs> > { protected: - typedef scalar_product_op BinaryOp; + typedef scalar_product_op BinaryOp; typedef typename evaluator::InnerIterator LhsIterator; typedef evaluator RhsEvaluator; typedef CwiseBinaryOp XprType; From bcc0f38f981f8efadeba314fb41e58f911c60ccc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:31:27 +0200 Subject: [PATCH 56/85] Add unittesting plugins to scalar_product_op and scalar_quotient_op to help chaking that types are properly propagated. --- Eigen/src/Core/functors/BinaryFunctors.h | 12 ++++++++++++ test/linearstructure.cpp | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 637514a20..77e9e6e93 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -70,7 +70,13 @@ template struct scalar_product_op : binary_op_base { typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) +#else + scalar_product_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -332,7 +338,13 @@ template struct scalar_quotient_op : binary_op_base { typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) +#else + scalar_quotient_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index e7f4b3dc5..7eef976d2 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -9,7 +9,7 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. static bool g_called; -#define EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN { g_called = true; } +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } #include "main.h" From 12350d3ac7a1192407b0f920bd937d4f753ec118 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:31:52 +0200 Subject: [PATCH 57/85] Add unit test for AlignedBox::center --- test/geo_alignedbox.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/geo_alignedbox.cpp b/test/geo_alignedbox.cpp index 2bdb4b7f2..ba3378aab 100644 --- a/test/geo_alignedbox.cpp +++ b/test/geo_alignedbox.cpp @@ -48,6 +48,8 @@ template void alignedbox(const BoxType& _box) b0.extend(p0); b0.extend(p1); VERIFY(b0.contains(p0*s1+(Scalar(1)-s1)*p1)); + VERIFY(b0.contains(b0.center())); + VERIFY(b0.center()==(p0+p1)/Scalar(2)); (b2 = b0).extend(b1); VERIFY(b2.contains(b0)); From f925dba3d981829fce7b383bec4d52fe41a023a4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 11:32:09 +0200 Subject: [PATCH 58/85] Fix compilation of BVH example --- unsupported/doc/examples/BVH_Example.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/unsupported/doc/examples/BVH_Example.cpp b/unsupported/doc/examples/BVH_Example.cpp index 6b6fac075..afb0c94c2 100644 --- a/unsupported/doc/examples/BVH_Example.cpp +++ b/unsupported/doc/examples/BVH_Example.cpp @@ -6,9 +6,7 @@ using namespace Eigen; typedef AlignedBox Box2d; namespace Eigen { - namespace internal { - Box2d bounding_box(const Vector2d &v) { return Box2d(v, v); } //compute the bounding box of a single point - } + Box2d bounding_box(const Vector2d &v) { return Box2d(v, v); } //compute the bounding box of a single point } struct PointPointMinimizer //how to compute squared distances between points and rectangles From 756ac4a93dea57bf61079c4867e712a8c26d77f6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 12:03:39 +0200 Subject: [PATCH 59/85] Fix doc. --- Eigen/src/plugins/CommonCwiseBinaryOps.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index c5eaea16d..03c4aac94 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -55,7 +55,7 @@ EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product); */ template const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; -/** \returns an expression of \c *this scaled by the scalar factor \a scalar +/** \returns an expression of \a expr scaled by the scalar factor \a scalar * * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ From a8c08e8b8e8bdd486a5a27b9f1e92c48ef4361cd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 12:06:10 +0200 Subject: [PATCH 60/85] Implement expr+scalar, scalar+expr, expr-scalar, and scalar-expr as binary expressions, and generalize supported scalar types. The following functors are now deprecated: scalar_add_op, scalar_sub_op, and scalar_rsub_op. --- Eigen/src/Core/functors/BinaryFunctors.h | 31 ++++++------ Eigen/src/Core/util/ForwardDeclarations.h | 1 - Eigen/src/plugins/ArrayCwiseBinaryOps.h | 57 ++++++++++++----------- test/linearstructure.cpp | 16 +++++++ test/mixingtypes.cpp | 12 +++++ 5 files changed, 70 insertions(+), 47 deletions(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 77e9e6e93..fd7b5f8b4 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -32,7 +32,13 @@ template struct scalar_sum_op : binary_op_base { typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) +#else + scalar_sum_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -315,7 +321,13 @@ template struct scalar_difference_op : binary_op_base { typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) +#else + scalar_difference_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const @@ -584,7 +596,7 @@ struct functor_traits > /** \internal * \brief Template functor to subtract a fixed scalar to another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op + * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op */ template struct scalar_sub_op { @@ -600,23 +612,6 @@ template struct functor_traits > { enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; -/** \internal - * \brief Template functor to subtract a scalar to fixed another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op - */ -template -struct scalar_rsub_op { - EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::psub(pset1(m_other), a); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; /** \internal * \brief Template functor to raise a scalar to a power diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 8334446d6..c1295dc5f 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -202,7 +202,6 @@ template struct scalar_square_op; template struct scalar_cube_op; template struct scalar_cast_op; template struct scalar_random_op; -template struct scalar_add_op; template struct scalar_constant_op; template struct scalar_identity_op; template struct scalar_sign_op; diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 351762e82..8bf4e9b18 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -192,48 +192,49 @@ EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ) #undef EIGEN_MAKE_CWISE_COMP_R_OP // scalar addition - +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum); +#else /** \returns an expression of \c *this with each coeff incremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. * * Example: \include Cwise_plus.cpp * Output: \verbinclude Cwise_plus.out * * \sa operator+=(), operator-() */ -EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp, const Derived> -operator+(const Scalar& scalar) const -{ - return CwiseUnaryOp, const Derived>(derived(), internal::scalar_add_op(scalar)); -} - -EIGEN_DEVICE_FUNC -friend inline const CwiseUnaryOp, const Derived> -operator+(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS& other) -{ - return other + scalar; -} +template +const CwiseBinaryOp,Derived,Constant > operator+(const T& scalar) const; +/** \returns an expression of \a expr with each coeff incremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator+(const T& scalar, const StorageBaseType& expr); +#endif +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference); +#else /** \returns an expression of \c *this with each coeff decremented by the constant \a scalar + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. * * Example: \include Cwise_minus.cpp * Output: \verbinclude Cwise_minus.out * - * \sa operator+(), operator-=() + * \sa operator+=(), operator-() */ -EIGEN_DEVICE_FUNC -inline const CwiseUnaryOp, const Derived> -operator-(const Scalar& scalar) const -{ - return CwiseUnaryOp, const Derived>(derived(), internal::scalar_sub_op(scalar));; -} - -EIGEN_DEVICE_FUNC -friend inline const CwiseUnaryOp, const Derived> -operator-(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS& other) -{ - return CwiseUnaryOp, const Derived>(other.derived(), internal::scalar_rsub_op(scalar));; -} +template +const CwiseBinaryOp,Derived,Constant > operator-(const T& scalar) const; +/** \returns an expression of the constant matrix of value \a scalar decremented by the coefficients of \a expr + * + * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. + */ +template friend +const CwiseBinaryOp,Constant,Derived> operator-(const T& scalar, const StorageBaseType& expr); +#endif /** \returns an expression of the coefficient-wise && operator of *this and \a other * diff --git a/test/linearstructure.cpp b/test/linearstructure.cpp index 7eef976d2..17474af10 100644 --- a/test/linearstructure.cpp +++ b/test/linearstructure.cpp @@ -93,6 +93,22 @@ template void real_complex(DenseIndex rows = MatrixType::Ro g_called = false; VERIFY_IS_APPROX(m1/s, m1/Scalar(s)); VERIFY(g_called && "matrix / real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s+m1.array(), Scalar(s)+m1.array()); + VERIFY(g_called && "real + matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()+s, m1.array()+Scalar(s)); + VERIFY(g_called && "matrix + real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s-m1.array(), Scalar(s)-m1.array()); + VERIFY(g_called && "real - matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()-s, m1.array()-Scalar(s)); + VERIFY(g_called && "matrix - real not properly optimized"); } void test_linearstructure() diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index dee4f35df..cf2207114 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -75,6 +75,18 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX(vcf / sf , vcf / complex(sf)); VERIFY_IS_APPROX(vf / scf , vf.template cast >() / scf); + // check scalar increment + VERIFY_IS_APPROX(vcf.array() + sf , vcf.array() + complex(sf)); + VERIFY_IS_APPROX(sd + vcd.array(), complex(sd) + vcd.array()); + VERIFY_IS_APPROX(vf.array() + scf, vf.template cast >().array() + scf); + VERIFY_IS_APPROX(scd + vd.array() , scd + vd.template cast >().array()); + + // check scalar subtractions + VERIFY_IS_APPROX(vcf.array() - sf , vcf.array() - complex(sf)); + VERIFY_IS_APPROX(sd - vcd.array(), complex(sd) - vcd.array()); + VERIFY_IS_APPROX(vf.array() - scf, vf.template cast >().array() - scf); + VERIFY_IS_APPROX(scd - vd.array() , scd - vd.template cast >().array()); + // check dot product vf.dot(vf); #if 0 // we get other compilation errors here than just static asserts From a9bb653a684b55fca1c9f58089f94871484ae50c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 12:07:00 +0200 Subject: [PATCH 61/85] Update doc (scalar_add_op is now deprecated) --- doc/CustomizingEigen.dox | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/CustomizingEigen.dox b/doc/CustomizingEigen.dox index cb25f4ec9..607f86658 100644 --- a/doc/CustomizingEigen.dox +++ b/doc/CustomizingEigen.dox @@ -56,13 +56,13 @@ void makeFloor(const MatrixBase& other) { derived() = derived().cw template void makeCeil(const MatrixBase& other) { derived() = derived().cwiseMax(other.derived()); } -const CwiseUnaryOp, Derived> +const CwiseBinaryOp, const Derived, const ConstantReturnType> operator+(const Scalar& scalar) const -{ return CwiseUnaryOp, Derived>(derived(), internal::scalar_add_op(scalar)); } +{ return CwiseBinaryOp, const Derived, const ConstantReturnType>(derived(), Constant(rows(),cols(),scalar)); } -friend const CwiseUnaryOp, Derived> +friend const CwiseBinaryOp, const ConstantReturnType, Derived> operator+(const Scalar& scalar, const MatrixBase& mat) -{ return CwiseUnaryOp, Derived>(mat.derived(), internal::scalar_add_op(scalar)); } +{ return CwiseBinaryOp, const ConstantReturnType, Derived>(Constant(rows(),cols(),scalar), mat.derived()); } \endcode Then one can the following declaration in the config.h or whatever prerequisites header file of his project: From 396d9cfb6eaa62ecafc6f2d02851f0d2f8d6b4ef Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 14:10:07 +0200 Subject: [PATCH 62/85] Generalize expr.pow(scalar), pow(expr,scalar) and pow(scalar,expr). Internal: scalar_pow_op (unary) is removed, and scalar_binary_pow_op is renamed scalar_pow_op. --- Eigen/src/Core/GlobalFunctions.h | 60 ++++++++++++++++++----- Eigen/src/Core/MathFunctions.h | 2 +- Eigen/src/Core/functors/BinaryFunctors.h | 39 ++++++--------- Eigen/src/Core/util/ForwardDeclarations.h | 3 +- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 21 +++++++- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 19 ------- test/mixingtypes.cpp | 47 ++++++++++++------ 7 files changed, 118 insertions(+), 73 deletions(-) diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index e489cefec..60e2ccfed 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -89,14 +89,32 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign) /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent. + * + * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar). * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Derived,Constant > + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) + && ScalarBinaryOpTraits::Defined, + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent) { + return x.derived().pow(exponent); + } + template - inline const Eigen::CwiseUnaryOp, const Derived> + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow) pow(const Eigen::ArrayBase& x, const typename Derived::Scalar& exponent) { return x.derived().pow(exponent); } +#endif /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. * @@ -106,12 +124,14 @@ namespace Eigen * Output: \verbinclude Cwise_array_power_array.out * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ template - inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) { - return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( x.derived(), exponents.derived() ); @@ -120,23 +140,39 @@ namespace Eigen /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. * * This function computes the coefficient-wise power between a scalar and an array of exponents. - * Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). * * Example: \include Cwise_scalar_power_array.cpp * Output: \verbinclude Cwise_scalar_power_array.out * * \sa ArrayBase::pow() + * + * \relates ArrayBase */ - template - inline const Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived> - pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Constant,Derived> + pow(const Scalar& x,const Eigen::ArrayBase& x); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) + && ScalarBinaryOpTraits::Defined, + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type + pow(const Scalar& x, const Eigen::ArrayBase& exponents) { - typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x); - return Eigen::CwiseBinaryOp, const typename Derived::ConstantReturnType, const Derived>( - constant_x, - exponents.derived() - ); + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); } + + template + inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow) + pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); + } +#endif /** * \brief Component-wise division of a scalar by array elements. diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 2a05ae12d..b683effab 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -498,7 +498,7 @@ template: struct pow_impl { //typedef Scalar retval; - typedef typename ScalarBinaryOpTraits >::ReturnType result_type; + typedef typename ScalarBinaryOpTraits >::ReturnType result_type; static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) { EIGEN_USING_STD_MATH(pow); diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index fd7b5f8b4..9e40303be 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -295,16 +295,24 @@ struct functor_traits > { /** \internal * \brief Template functor to compute the pow of two scalars */ -template -struct scalar_binary_pow_op : binary_op_base +template +struct scalar_pow_op : binary_op_base { - typedef typename ScalarBinaryOpTraits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) + typedef typename ScalarBinaryOpTraits::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN + EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op) +#else + scalar_pow_op() { + typedef Scalar LhsScalar; + typedef Exponent RhsScalar; + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif EIGEN_DEVICE_FUNC - inline result_type operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } + inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); } }; -template -struct functor_traits > { +template +struct functor_traits > { enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; @@ -613,23 +621,6 @@ struct functor_traits > { enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; -/** \internal - * \brief Template functor to raise a scalar to a power - * \sa class CwiseUnaryOp, Cwise::pow - */ -template -struct scalar_pow_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} - EIGEN_DEVICE_FUNC - inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } - const Scalar m_exponent; -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; - /** \internal * \brief Template functor to compute the quotient between a scalar and array entries. * \sa class CwiseUnaryOp, Cwise::inverse() diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index c1295dc5f..830f20f90 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -196,7 +196,6 @@ template struct scalar_sin_op; template struct scalar_acos_op; template struct scalar_asin_op; template struct scalar_tan_op; -template struct scalar_pow_op; template struct scalar_inverse_op; template struct scalar_square_op; template struct scalar_cube_op; @@ -209,10 +208,10 @@ template struct scalar_igamma_op; template struct scalar_igammac_op; template struct scalar_betainc_op; +template struct scalar_pow_op; template struct scalar_hypot_op; template struct scalar_product_op; template struct scalar_quotient_op; -template struct scalar_binary_pow_op; } // end namespace internal diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 8bf4e9b18..1e20e35d7 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -82,7 +82,26 @@ max * Example: \include Cwise_array_power_array.cpp * Output: \verbinclude Cwise_array_power_array.out */ -EIGEN_MAKE_CWISE_BINARY_OP(pow,binary_pow) +EIGEN_MAKE_CWISE_BINARY_OP(pow,pow) + +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow); +#else +/** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent + * + * \tparam T is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression. + * + * This function computes the coefficient-wise power. The function MatrixBase::pow() in the + * unsupported module MatrixFunctions computes the matrix power. + * + * Example: \include Cwise_pow.cpp + * Output: \verbinclude Cwise_pow.out + * + * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log() + */ +template +const CwiseBinaryOp,Derived,Constant > pow(const T& exponent) const; +#endif // TODO code generating macros could be moved to Macros.h and could include generation of documentation diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 775fa6ee0..4a6361d8a 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -26,7 +26,6 @@ typedef CwiseUnaryOp, const Derived> LgammaRe typedef CwiseUnaryOp, const Derived> DigammaReturnType; typedef CwiseUnaryOp, const Derived> ErfReturnType; typedef CwiseUnaryOp, const Derived> ErfcReturnType; -typedef CwiseUnaryOp, const Derived> PowReturnType; typedef CwiseUnaryOp, const Derived> SquareReturnType; typedef CwiseUnaryOp, const Derived> CubeReturnType; typedef CwiseUnaryOp, const Derived> RoundReturnType; @@ -388,24 +387,6 @@ erfc() const return ErfcReturnType(derived()); } -/** \returns an expression of the coefficient-wise power of *this to the given exponent. - * - * This function computes the coefficient-wise power. The function MatrixBase::pow() in the - * unsupported module MatrixFunctions computes the matrix power. - * - * Example: \include Cwise_pow.cpp - * Output: \verbinclude Cwise_pow.out - * - * \sa exp(), log() - */ -EIGEN_DEVICE_FUNC -inline const PowReturnType -pow(const Scalar& exponent) const -{ - return PowReturnType(derived(), internal::scalar_pow_op(exponent)); -} - - /** \returns an expression of the coefficient-wise inverse of *this. * * Example: \include Cwise_inverse.cpp diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index cf2207114..b38271a17 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -23,10 +23,18 @@ #endif +static bool g_called; +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } + #include "main.h" using namespace std; +#define VERIFY_MIX_SCALAR(XPR,REF) \ + g_called = false; \ + VERIFY_IS_APPROX(XPR,REF); \ + VERIFY( g_called && #XPR" not properly optimized"); + template void mixingtypes(int size = SizeAtCompileType) { typedef std::complex CF; @@ -66,26 +74,34 @@ template void mixingtypes(int size = SizeAtCompileType) #endif // check scalar products - VERIFY_IS_APPROX(vcf * sf , vcf * complex(sf)); - VERIFY_IS_APPROX(sd * vcd , complex(sd) * vcd); - VERIFY_IS_APPROX(vf * scf , vf.template cast >() * scf); - VERIFY_IS_APPROX(scd * vd , scd * vd.template cast >()); + VERIFY_MIX_SCALAR(vcf * sf , vcf * complex(sf)); + VERIFY_MIX_SCALAR(sd * vcd , complex(sd) * vcd); + VERIFY_MIX_SCALAR(vf * scf , vf.template cast >() * scf); + VERIFY_MIX_SCALAR(scd * vd , scd * vd.template cast >()); // check scalar quotients - VERIFY_IS_APPROX(vcf / sf , vcf / complex(sf)); - VERIFY_IS_APPROX(vf / scf , vf.template cast >() / scf); + VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); + VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); // check scalar increment - VERIFY_IS_APPROX(vcf.array() + sf , vcf.array() + complex(sf)); - VERIFY_IS_APPROX(sd + vcd.array(), complex(sd) + vcd.array()); - VERIFY_IS_APPROX(vf.array() + scf, vf.template cast >().array() + scf); - VERIFY_IS_APPROX(scd + vd.array() , scd + vd.template cast >().array()); + VERIFY_MIX_SCALAR(vcf.array() + sf , vcf.array() + complex(sf)); + VERIFY_MIX_SCALAR(sd + vcd.array(), complex(sd) + vcd.array()); + VERIFY_MIX_SCALAR(vf.array() + scf, vf.template cast >().array() + scf); + VERIFY_MIX_SCALAR(scd + vd.array() , scd + vd.template cast >().array()); // check scalar subtractions - VERIFY_IS_APPROX(vcf.array() - sf , vcf.array() - complex(sf)); - VERIFY_IS_APPROX(sd - vcd.array(), complex(sd) - vcd.array()); - VERIFY_IS_APPROX(vf.array() - scf, vf.template cast >().array() - scf); - VERIFY_IS_APPROX(scd - vd.array() , scd - vd.template cast >().array()); + VERIFY_MIX_SCALAR(vcf.array() - sf , vcf.array() - complex(sf)); + VERIFY_MIX_SCALAR(sd - vcd.array(), complex(sd) - vcd.array()); + VERIFY_MIX_SCALAR(vf.array() - scf, vf.template cast >().array() - scf); + VERIFY_MIX_SCALAR(scd - vd.array() , scd - vd.template cast >().array()); + + // check scalar powers + VERIFY_MIX_SCALAR( pow(vcf.array(), sf), pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( vcf.array().pow(sf) , pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( pow(sd, vcd.array()), pow(complex(sd), vcd.array()) ); + VERIFY_MIX_SCALAR( pow(vf.array(), scf), pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( vf.array().pow(scf) , pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( pow(scd, vd.array()), pow(scd, vd.template cast >().array()) ); // check dot product vf.dot(vf); @@ -215,6 +231,9 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); VERIFY_IS_APPROX( mcd.array().pow(md.array()), mcd.array().pow(md.template cast().eval().array()) ); + VERIFY_IS_APPROX( pow(md.array(),mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( pow(mcd.array(),md.array()), mcd.array().pow(md.template cast().eval().array()) ); + rcd = mcd; VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); rcd = mcd; From 5d3820373513c950fbd238b0d8fd73bf3cd3e7fb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 15:06:03 +0200 Subject: [PATCH 63/85] Update Tensor module to use bind1st_op and bind2nd_op --- .../Eigen/CXX11/src/Tensor/TensorBase.h | 74 +++++++++---------- 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 8f3580ba7..aeaa9dea9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -204,64 +204,62 @@ class TensorBase } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> pow(Scalar exponent) const { - return unaryExpr(internal::scalar_pow_op(exponent)); + return unaryExpr(internal::bind2nd_op >(exponent)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator+ (Scalar rhs) const { - return unaryExpr(internal::scalar_add_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, const Derived> + const TensorCwiseUnaryOp >, const Derived> operator+ (Scalar lhs, const Derived& rhs) { - return rhs + lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator- (Scalar rhs) const { EIGEN_STATIC_ASSERT((NumTraits::IsSigned || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return unaryExpr(internal::scalar_sub_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, - const TensorCwiseUnaryOp, const Derived> > + const TensorCwiseUnaryOp >, const Derived> operator- (Scalar lhs, const Derived& rhs) { - return -rhs + lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator* (Scalar rhs) const { - return unaryExpr(internal::scalar_multiple_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, const Derived> + const TensorCwiseUnaryOp >, const Derived> operator* (Scalar lhs, const Derived& rhs) { - return rhs * lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator/ (Scalar rhs) const { - return unaryExpr(internal::scalar_quotient1_op(rhs)); + return unaryExpr(internal::bind2nd_op >(rhs)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend - const TensorCwiseUnaryOp, - const TensorCwiseUnaryOp, const Derived> > + const TensorCwiseUnaryOp >, const Derived> operator/ (Scalar lhs, const Derived& rhs) { - return rhs.inverse() * lhs; + return rhs.unaryExpr(internal::bind1st_op >(lhs)); } EIGEN_DEVICE_FUNC @@ -371,66 +369,66 @@ class TensorBase // Comparisons and tests. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator<=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator>=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator==(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TensorCwiseBinaryOp, const Derived, const OtherDerived> + const TensorCwiseBinaryOp, const Derived, const OtherDerived> operator!=(const OtherDerived& other) const { - return binaryExpr(other.derived(), internal::scalar_cmp_op()); + return binaryExpr(other.derived(), internal::scalar_cmp_op()); } // comparisons and tests for Scalars EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<(Scalar threshold) const { return operator<(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator<=(Scalar threshold) const { return operator<=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>(Scalar threshold) const { return operator>(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator>=(Scalar threshold) const { return operator>=(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator==(Scalar threshold) const { return operator==(constant(threshold)); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > operator!=(Scalar threshold) const { return operator!=(constant(threshold)); } From 62134082aa0dd33cb7328f9f9a86491d21a52444 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 15:06:35 +0200 Subject: [PATCH 64/85] Update AutoDiffScalar wrt to scalar-multiple. --- Eigen/src/Core/util/Macros.h | 2 + .../Eigen/src/AutoDiff/AutoDiffScalar.h | 89 +++++++++---------- 2 files changed, 43 insertions(+), 48 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index f3c6512c9..031e0892e 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -462,6 +462,8 @@ #define EIGEN_CAT2(a,b) a ## b #define EIGEN_CAT(a,b) EIGEN_CAT2(a,b) +#define EIGEN_COMMA , + // convert a token to a string #define EIGEN_MAKESTRING2(a) #a #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 0ed91fdb7..feaeeaf5a 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -30,6 +30,13 @@ template struct auto_diff_special_op; } // end namespace internal +template class AutoDiffScalar; + +template +inline AutoDiffScalar MakeAutoDiffScalar(const typename NewDerType::Scalar& value, const NewDerType &der) { + return AutoDiffScalar(value,der); +} + /** \class AutoDiffScalar * \brief A scalar type replacement with automatic differentation capability * @@ -257,20 +264,16 @@ class AutoDiffScalar -m_derivatives); } - inline const AutoDiffScalar, const DerType> > + inline const AutoDiffScalar operator*(const Scalar& other) const { - return AutoDiffScalar, const DerType> >( - m_value * other, - (m_derivatives * other)); + return MakeAutoDiffScalar(m_value * other, m_derivatives * other); } - friend inline const AutoDiffScalar, const DerType> > + friend inline const AutoDiffScalar operator*(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, const DerType> >( - a.value() * other, - a.derivatives() * other); + return MakeAutoDiffScalar(a.value() * other, a.derivatives() * other); } // inline const AutoDiffScalar, DerType>::Type > @@ -289,20 +292,16 @@ class AutoDiffScalar // a.derivatives() * other); // } - inline const AutoDiffScalar, const DerType> > + inline const AutoDiffScalar operator/(const Scalar& other) const { - return AutoDiffScalar, const DerType> >( - m_value / other, - (m_derivatives * (Scalar(1)/other))); + return MakeAutoDiffScalar(m_value / other, (m_derivatives * (Scalar(1)/other))); } - friend inline const AutoDiffScalar, const DerType> > + friend inline const AutoDiffScalar operator/(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, const DerType> >( - other / a.value(), - a.derivatives() * (Scalar(-other) / (a.value()*a.value()))); + return MakeAutoDiffScalar(other / a.value(), a.derivatives() * (Scalar(-other) / (a.value()*a.value()))); } // inline const AutoDiffScalar, DerType>::Type > @@ -322,34 +321,29 @@ class AutoDiffScalar // } template - inline const AutoDiffScalar, - const CwiseBinaryOp, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type > > > > + inline const AutoDiffScalar EIGEN_COMMA + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) EIGEN_COMMA + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,Scalar,product) >,Scalar,product) > operator/(const AutoDiffScalar& other) const { internal::make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - const CwiseBinaryOp, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type > > > >( + return MakeAutoDiffScalar( m_value / other.value(), - ((m_derivatives * other.value()) - (m_value * other.derivatives())) + ((m_derivatives * other.value()) - (other.derivatives() * m_value)) * (Scalar(1)/(other.value()*other.value()))); } template inline const AutoDiffScalar, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type> > > + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product), + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,Scalar,product) > > operator*(const AutoDiffScalar& other) const { internal::make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - const CwiseUnaryOp, const DerType>, - const CwiseUnaryOp, const typename internal::remove_all::type > > >( + return MakeAutoDiffScalar( m_value * other.value(), - (m_derivatives * other.value()) + (m_value * other.derivatives())); + (m_derivatives * other.value()) + (other.derivatives() * m_value)); } inline AutoDiffScalar& operator*=(const Scalar& other) @@ -533,11 +527,11 @@ struct ScalarBinaryOpTraits > #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \ template \ - inline const Eigen::AutoDiffScalar::type>::Scalar>, const typename Eigen::internal::remove_all::type> > \ + inline const Eigen::AutoDiffScalar< \ + EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename Eigen::internal::remove_all::type, typename Eigen::internal::traits::type>::Scalar, product) > \ FUNC(const Eigen::AutoDiffScalar& x) { \ using namespace Eigen; \ typedef typename Eigen::internal::traits::type>::Scalar Scalar; \ - typedef AutoDiffScalar, const typename Eigen::internal::remove_all::type> > ReturnType; \ CODE; \ } @@ -570,46 +564,45 @@ inline AutoDiffScalar::type::Plain EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs, using std::abs; - return ReturnType(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );) + return Eigen::MakeAutoDiffScalar(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs2, using numext::abs2; - return ReturnType(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));) + return Eigen::MakeAutoDiffScalar(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sqrt, using std::sqrt; Scalar sqrtx = sqrt(x.value()); - return ReturnType(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));) + return Eigen::MakeAutoDiffScalar(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cos, using std::cos; using std::sin; - return ReturnType(cos(x.value()), x.derivatives() * (-sin(x.value())));) + return Eigen::MakeAutoDiffScalar(cos(x.value()), x.derivatives() * (-sin(x.value())));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sin, using std::sin; using std::cos; - return ReturnType(sin(x.value()),x.derivatives() * cos(x.value()));) + return Eigen::MakeAutoDiffScalar(sin(x.value()),x.derivatives() * cos(x.value()));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(exp, using std::exp; Scalar expx = exp(x.value()); - return ReturnType(expx,x.derivatives() * expx);) + return Eigen::MakeAutoDiffScalar(expx,x.derivatives() * expx);) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(log, using std::log; - return ReturnType(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));) + return Eigen::MakeAutoDiffScalar(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));) template -inline const Eigen::AutoDiffScalar::type>::Scalar>, const typename internal::remove_all::type> > -pow(const Eigen::AutoDiffScalar& x, const typename internal::traits::type>::Scalar &y) +inline const Eigen::AutoDiffScalar< +EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all::type,typename internal::traits::type>::Scalar,product) > +pow(const Eigen::AutoDiffScalar &x, const typename internal::traits::type>::Scalar &y) { using namespace Eigen; typedef typename internal::remove_all::type DerTypeCleaned; typedef typename Eigen::internal::traits::Scalar Scalar; - return AutoDiffScalar, const DerTypeCleaned> >( - std::pow(x.value(),y), - x.derivatives() * (y * std::pow(x.value(),y-1))); + return Eigen::MakeAutoDiffScalar(std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1))); } @@ -634,17 +627,17 @@ atan2(const AutoDiffScalar& a, const AutoDiffScalar& b) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(tan, using std::tan; using std::cos; - return ReturnType(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));) + return Eigen::MakeAutoDiffScalar(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(asin, using std::sqrt; using std::asin; - return ReturnType(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));) + return Eigen::MakeAutoDiffScalar(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));) EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(acos, using std::sqrt; using std::acos; - return ReturnType(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));) + return Eigen::MakeAutoDiffScalar(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));) #undef EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY From 70dad84b737374e97319733ebff6f36e86ac384d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 15:26:37 +0200 Subject: [PATCH 65/85] Generalize expr/expr and scalar/expr wrt scalar types. --- Eigen/src/Core/GlobalFunctions.h | 34 +++++++++++++++++++------ Eigen/src/plugins/ArrayCwiseBinaryOps.h | 4 +-- test/mixingtypes.cpp | 4 ++- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 60e2ccfed..5ffa6c694 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -175,18 +175,36 @@ namespace Eigen #endif /** - * \brief Component-wise division of a scalar by array elements. + * \brief Component-wise division of the scalar \a s by array elements of \a a. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). + * + * \relates ArrayBase **/ - template - inline const Eigen::CwiseUnaryOp, const Derived> - operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Constant,Derived> + operator/(const Scalar& s,const Eigen::ArrayBase& a); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) + && ScalarBinaryOpTraits::Defined, + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient) >::type + operator/(const Scalar& s, const Eigen::ArrayBase& a) { - return Eigen::CwiseUnaryOp, const Derived>( - a.derived(), - Eigen::internal::scalar_inverse_mult_op(s) - ); + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient)( + typename internal::plain_constant_type::type(a.rows(), a.cols(), s), a.derived() ); } + template + inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,quotient) + operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,quotient)( + typename internal::plain_constant_type::type(a.rows(), a.cols(), s), a.derived() ); + } +#endif + /** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. * * This function computes the coefficient-wise incomplete gamma function. diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 1e20e35d7..0c1429c75 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -17,10 +17,10 @@ operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const */ template EIGEN_DEVICE_FUNC -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const { - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); + return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); } /** \returns an expression of the coefficient-wise min of \c *this and \a other diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index b38271a17..fe8c16470 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -82,6 +82,8 @@ template void mixingtypes(int size = SizeAtCompileType) // check scalar quotients VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); + VERIFY_MIX_SCALAR(vf.array() / scf, vf.template cast >().array() / scf); + VERIFY_MIX_SCALAR(scd / vd.array() , scd / vd.template cast >().array()); // check scalar increment VERIFY_MIX_SCALAR(vcf.array() + sf , vcf.array() + complex(sf)); @@ -225,7 +227,7 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX( md.array() - mcd.array(), md.template cast().eval().array() - mcd.array() ); VERIFY_IS_APPROX( mcd.array() - md.array(), mcd.array() - md.template cast().eval().array() ); -// VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); + VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); From 1004c4df99a3e4a019f05b83badb06f4e2df5ee6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 14 Jun 2016 15:27:28 +0200 Subject: [PATCH 66/85] Cleanup unused functors. --- Eigen/src/Core/functors/BinaryFunctors.h | 113 ----------------------- Eigen/src/Core/util/BlasUtil.h | 13 --- 2 files changed, 126 deletions(-) diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index 9e40303be..2c1331208 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -525,119 +525,6 @@ template struct bind2nd_op : BinaryOp { }; template struct functor_traits > : functor_traits {}; -/** \internal - * \brief Template functor to multiply a scalar by a fixed other one - * - * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ - */ -/* NOTE why doing the pset1() in packetOp *is* an optimization ? - * indeed it seems better to declare m_other as a Packet and do the pset1() once - * in the constructor. However, in practice: - * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() outside the loop :) - * - simpler code ;) - * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) - */ -template -struct scalar_multiple_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - - -/** \internal - * \brief Template functor to divide a scalar by a fixed other one - * - * This functor is used to implement the quotient of a matrix by - * a scalar where the scalar type is not necessarily a floating point type. - * - * \sa class CwiseUnaryOp, MatrixBase::operator/ - */ -template -struct scalar_quotient1_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pdiv(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - - - -/** \internal - * \brief Template functor to add a scalar to a fixed other one - * \sa class CwiseUnaryOp, Array::operator+ - */ -/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */ -template -struct scalar_add_op { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::padd(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to subtract a fixed scalar to another one - * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op - */ -template -struct scalar_sub_op { - EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { } - EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { } - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::psub(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - - -/** \internal - * \brief Template functor to compute the quotient between a scalar and array entries. - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_mult_op { - EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} - EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; } - template - EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(m_other),a); } - Scalar m_other; -}; -template -struct functor_traits > -{ enum { PacketAccess = packet_traits::HasDiv, Cost = NumTraits::template Div::Cost }; }; - } // end namespace internal diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 7e8f90d88..a85ad558f 100755 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -316,19 +316,6 @@ struct blas_traits, NestedXpr, const Cwi { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; } }; -// pop scalar multiple (using deprecated scalar_multiple_op) -template -struct blas_traits, NestedXpr> > - : blas_traits -{ - typedef blas_traits Base; - typedef CwiseUnaryOp, NestedXpr> XprType; - typedef typename Base::ExtractType ExtractType; - static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } - static inline Scalar extractScalarFactor(const XprType& x) - { return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); } -}; - // pop opposite template struct blas_traits, NestedXpr> > From c4d10e921f02409d771e960647beae0b6d26dc88 Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Tue, 14 Jun 2016 19:44:07 +0100 Subject: [PATCH 67/85] Implement exclusive scan option --- .../Eigen/CXX11/src/Tensor/TensorBase.h | 12 +++++----- .../Eigen/CXX11/src/Tensor/TensorScan.h | 22 ++++++++++++----- unsupported/test/cxx11_tensor_scan.cpp | 24 +++++++++++++++++++ 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 8f3580ba7..87fa672f4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -486,22 +486,22 @@ class TensorBase typedef TensorScanOp, const Derived> TensorScanSumOp; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorScanSumOp - cumsum(const Index& axis) const { - return TensorScanSumOp(derived(), axis); + cumsum(const Index& axis, bool exclusive = false) const { + return TensorScanSumOp(derived(), axis, exclusive); } typedef TensorScanOp, const Derived> TensorScanProdOp; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorScanProdOp - cumprod(const Index& axis) const { - return TensorScanProdOp(derived(), axis); + cumprod(const Index& axis, bool exclusive = false) const { + return TensorScanProdOp(derived(), axis, exclusive); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorScanOp - scan(const Index& axis, const Reducer& reducer) const { - return TensorScanOp(derived(), axis, reducer); + scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const { + return TensorScanOp(derived(), axis, exclusive, reducer); } // Reductions. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index 5207f6a8d..1aa196b84 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -57,8 +57,8 @@ public: typedef typename Eigen::internal::traits::Index Index; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp( - const XprType& expr, const Index& axis, const Op& op = Op()) - : m_expr(expr), m_axis(axis), m_accumulator(op) {} + const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op()) + : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index axis() const { return m_axis; } @@ -66,11 +66,14 @@ public: const XprType& expression() const { return m_expr; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op accumulator() const { return m_accumulator; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + bool exclusive() const { return m_exclusive; } protected: typename XprType::Nested m_expr; const Index m_axis; const Op m_accumulator; + const bool m_exclusive; }; // Eval as rvalue @@ -99,6 +102,7 @@ struct TensorEvaluator, Device> { : m_impl(op.expression(), device), m_device(device), m_axis(op.axis()), + m_exclusive(op.exclusive()), m_accumulator(op.accumulator()), m_dimensions(m_impl.dimensions()), m_size(m_dimensions[m_axis]), @@ -168,6 +172,7 @@ protected: TensorEvaluator m_impl; const Device& m_device; const Index m_axis; + const bool m_exclusive; Op m_accumulator; const Dimensions& m_dimensions; const Index& m_size; @@ -176,7 +181,7 @@ protected: // TODO(ibab) Parallelize this single-threaded implementation if desired EIGEN_DEVICE_FUNC void accumulateTo(Scalar* data) { - // We fix the index along the scan axis to 0 and perform an + // We fix the index along the scan axis to 0 and perform a // scan per remaining entry. The iteration is split into two nested // loops to avoid an integer division by keeping track of each idx1 and idx2. for (Index idx1 = 0; idx1 < dimensions().TotalSize() / m_size; idx1 += m_stride) { @@ -184,12 +189,17 @@ protected: // Calculate the starting offset for the scan Index offset = idx1 * m_size + idx2; - // Compute the prefix sum along the axis, starting at the calculated offset + // Compute the scan along the axis, starting at the calculated offset CoeffReturnType accum = m_accumulator.initialize(); for (Index idx3 = 0; idx3 < m_size; idx3++) { Index curr = offset + idx3 * m_stride; - m_accumulator.reduce(m_impl.coeff(curr), &accum); - data[curr] = m_accumulator.finalize(accum); + if (m_exclusive) { + data[curr] = m_accumulator.finalize(accum); + m_accumulator.reduce(m_impl.coeff(curr), &accum); + } else { + m_accumulator.reduce(m_impl.coeff(curr), &accum); + data[curr] = m_accumulator.finalize(accum); + } } } } diff --git a/unsupported/test/cxx11_tensor_scan.cpp b/unsupported/test/cxx11_tensor_scan.cpp index dbd3023d7..bafa6c96e 100644 --- a/unsupported/test/cxx11_tensor_scan.cpp +++ b/unsupported/test/cxx11_tensor_scan.cpp @@ -38,6 +38,30 @@ static void test_1d_scan() } } +template +static void test_1d_inclusive_scan() +{ + int size = 50; + Tensor tensor(size); + tensor.setRandom(); + Tensor result = tensor.cumsum(0, true); + + VERIFY_IS_EQUAL(tensor.dimension(0), result.dimension(0)); + + float accum = 0; + for (int i = 0; i < size; i++) { + VERIFY_IS_EQUAL(result(i), accum); + accum += tensor(i); + } + + accum = 1; + result = tensor.cumprod(0, true); + for (int i = 0; i < size; i++) { + VERIFY_IS_EQUAL(result(i), accum); + accum *= tensor(i); + } +} + template static void test_4d_scan() { From 101ea26f5e18919972b321b5f7e3ef4e07be3fd6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 00:01:16 +0200 Subject: [PATCH 68/85] Include the cost of stores in unrolling (also fix infinite unrolling with expression costing 0 like Constant) --- Eigen/src/Core/AssignEvaluator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f966724cc..1df717bac 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -116,9 +116,9 @@ private: : 1, UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic - && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit), + && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), MayUnrollInner = int(InnerSize) != Dynamic - && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit) + && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) }; public: From 4e7c3af874e9e1273b5bf3acdc4b53e8c6bdf086 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 00:04:10 +0200 Subject: [PATCH 69/85] Cleanup useless helper: internal::product_result_scalar --- Eigen/src/Core/Product.h | 36 +----------------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index bad289761..ae0c94b38 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -16,40 +16,6 @@ template class Pro namespace internal { -// Determine the scalar of Product. This is normally the same as Lhs::Scalar times -// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor. -// TODO: this could be removed once ScalarBinaryOpTraits handles void. -template::Shape, - typename RhsShape = typename evaluator_traits::Shape > -struct product_result_scalar -{ - typedef typename ScalarBinaryOpTraits::ReturnType Scalar; -}; - -template -struct product_result_scalar -{ - typedef typename Rhs::Scalar Scalar; -}; - -template - struct product_result_scalar -{ - typedef typename Lhs::Scalar Scalar; -}; - -template -struct product_result_scalar -{ - typedef typename Rhs::Scalar Scalar; -}; - -template - struct product_result_scalar -{ - typedef typename Lhs::Scalar Scalar; -}; - template struct traits > { @@ -60,7 +26,7 @@ struct traits > typedef MatrixXpr XprKind; - typedef typename product_result_scalar::Scalar Scalar; + typedef typename ScalarBinaryOpTraits::Scalar, typename traits::Scalar>::ReturnType Scalar; typedef typename product_promote_storage_type::ret>::ret StorageKind; From aedc5be1d6b6b42f5037c4005b437f3e552ac101 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 14 Jun 2016 17:51:47 -0700 Subject: [PATCH 70/85] Avoid generating pseudo random numbers that are multiple of 5: this helps spread the load over multiple cpus without havind to rely on work stealing. --- .../src/ThreadPool/NonBlockingThreadPool.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index c094563b7..30b292352 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -99,10 +99,12 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { typedef typename Environment::EnvThread Thread; struct PerThread { - bool inited; + PerThread() : pool(NULL), index(-1) { + rand = std::hash()(std::this_thread::get_id()); + } NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. unsigned index; // Worker thread index in pool. - unsigned rand; // Random generator state. + uint64_t rand; // Random generator state. }; Environment env_; @@ -235,17 +237,18 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { return -1; } - PerThread* GetPerThread() { + static EIGEN_STRONG_INLINE PerThread* GetPerThread() { EIGEN_THREAD_LOCAL PerThread per_thread_; PerThread* pt = &per_thread_; - if (pt->inited) return pt; - pt->inited = true; - pt->rand = static_cast(std::hash()(std::this_thread::get_id())); return pt; } - static unsigned Rand(unsigned* state) { - return *state = *state * 1103515245 + 12345; + static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) { + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + // Generate the random output (using the PCG-XSH-RS scheme) + return (current ^ (current >> 22)) >> (22 + (current >> 61)); } }; From c55035b9c0c894551850d122ac8b0cf1a053c28e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 09:57:33 +0200 Subject: [PATCH 71/85] Include the cost of stores in unrolling of triangular expressions. --- Eigen/src/Core/TriangularMatrix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 8731e9127..c599e0b32 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -794,7 +794,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr enum { unroll = DstXprType::SizeAtCompileTime != Dynamic && SrcEvaluatorType::CoeffReadCost < HugeCost - && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT + && DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT }; triangular_assignment_loop::run(kernel); From 4794834397aa0e2a6570a2f736b65b72b7db1aad Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 09:58:49 +0200 Subject: [PATCH 72/85] Propagate functor to ScalarBinaryOpTraits --- Eigen/src/Core/GlobalFunctions.h | 6 +++--- Eigen/src/Core/util/Macros.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 5ffa6c694..0361b40ad 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -103,7 +103,7 @@ namespace Eigen #else template inline typename internal::enable_if< !(internal::is_same::value) - && ScalarBinaryOpTraits::Defined, + && ScalarBinaryOpTraits >::Defined, const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent) { return x.derived().pow(exponent); @@ -157,7 +157,7 @@ namespace Eigen #else template inline typename internal::enable_if< !(internal::is_same::value) - && ScalarBinaryOpTraits::Defined, + && ScalarBinaryOpTraits >::Defined, const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type pow(const Scalar& x, const Eigen::ArrayBase& exponents) { @@ -188,7 +188,7 @@ namespace Eigen #else template inline typename internal::enable_if< !(internal::is_same::value) - && ScalarBinaryOpTraits::Defined, + && ScalarBinaryOpTraits >::Defined, const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient) >::type operator/(const Scalar& s, const Eigen::ArrayBase& a) { diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 031e0892e..87cc44657 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -914,7 +914,7 @@ namespace Eigen { } \ \ template EIGEN_DEVICE_FUNC inline \ - typename internal::enable_if::Defined, \ + typename internal::enable_if >::Defined, \ const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME) >::type \ (METHOD)(const T& scalar) const { \ return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME)(derived(), \ @@ -930,7 +930,7 @@ namespace Eigen { } \ \ template EIGEN_DEVICE_FUNC inline friend \ - typename internal::enable_if::Defined, \ + typename internal::enable_if >::Defined, \ const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME) >::type \ (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME)( \ From eb91345d641cc1aa7a62af1bf6db3e38a6d7225a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 15:22:03 +0200 Subject: [PATCH 73/85] Move scalar/expr to ArrayBase and fix documentation --- Eigen/src/Core/GlobalFunctions.h | 31 ------------------------ Eigen/src/plugins/ArrayCwiseBinaryOps.h | 24 ++++++++++++++---- Eigen/src/plugins/CommonCwiseBinaryOps.h | 6 ++--- 3 files changed, 22 insertions(+), 39 deletions(-) diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 0361b40ad..b9c3ec25b 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -173,37 +173,6 @@ namespace Eigen typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); } #endif - - /** - * \brief Component-wise division of the scalar \a s by array elements of \a a. - * - * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). - * - * \relates ArrayBase - **/ -#ifdef EIGEN_PARSED_BY_DOXYGEN - template - inline const CwiseBinaryOp,Constant,Derived> - operator/(const Scalar& s,const Eigen::ArrayBase& a); -#else - template - inline typename internal::enable_if< !(internal::is_same::value) - && ScalarBinaryOpTraits >::Defined, - const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient) >::type - operator/(const Scalar& s, const Eigen::ArrayBase& a) - { - return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,quotient)( - typename internal::plain_constant_type::type(a.rows(), a.cols(), s), a.derived() ); - } - - template - inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,quotient) - operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) - { - return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,quotient)( - typename internal::plain_constant_type::type(a.rows(), a.cols(), s), a.derived() ); - } -#endif /** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays. * diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 0c1429c75..6cb609604 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -100,7 +100,7 @@ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow); * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log() */ template -const CwiseBinaryOp,Derived,Constant > pow(const T& exponent) const; +const CwiseBinaryOp,Derived,Constant > pow(const T& exponent) const; #endif @@ -224,13 +224,13 @@ EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum); * \sa operator+=(), operator-() */ template -const CwiseBinaryOp,Derived,Constant > operator+(const T& scalar) const; +const CwiseBinaryOp,Derived,Constant > operator+(const T& scalar) const; /** \returns an expression of \a expr with each coeff incremented by the constant \a scalar * * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template friend -const CwiseBinaryOp,Constant,Derived> operator+(const T& scalar, const StorageBaseType& expr); +const CwiseBinaryOp,Constant,Derived> operator+(const T& scalar, const StorageBaseType& expr); #endif #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -246,13 +246,27 @@ EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference); * \sa operator+=(), operator-() */ template -const CwiseBinaryOp,Derived,Constant > operator-(const T& scalar) const; +const CwiseBinaryOp,Derived,Constant > operator-(const T& scalar) const; /** \returns an expression of the constant matrix of value \a scalar decremented by the coefficients of \a expr * * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template friend -const CwiseBinaryOp,Constant,Derived> operator-(const T& scalar, const StorageBaseType& expr); +const CwiseBinaryOp,Constant,Derived> operator-(const T& scalar, const StorageBaseType& expr); +#endif + + +#ifndef EIGEN_PARSED_BY_DOXYGEN + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient) +#else + /** + * \brief Component-wise division of the scalar \a s by array elements of \a a. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). + */ + template friend + inline const CwiseBinaryOp,Constant,Derived> + operator/(const T& s,const StorageBaseType& a); #endif /** \returns an expression of the coefficient-wise && operator of *this and \a other diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index 03c4aac94..afac08a3f 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -54,13 +54,13 @@ EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product); * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template -const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; +const CwiseBinaryOp,Derived,Constant > operator*(const T& scalar) const; /** \returns an expression of \a expr scaled by the scalar factor \a scalar * * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template friend -const CwiseBinaryOp,Constant,Derived> operator*(const T& scalar, const StorageBaseType& expr); +const CwiseBinaryOp,Constant,Derived> operator*(const T& scalar, const StorageBaseType& expr); #endif @@ -73,5 +73,5 @@ EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient); * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression. */ template -const CwiseBinaryOp,Derived,Constant > operator/(const T& scalar) const; +const CwiseBinaryOp,Derived,Constant > operator/(const T& scalar) const; #endif From 67c12531e567629e84713fbb3150560c916bd08c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 15 Jun 2016 18:11:33 +0200 Subject: [PATCH 74/85] Fix warnings with gcc --- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 6 +++--- Eigen/src/plugins/CommonCwiseBinaryOps.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 6cb609604..19e25ab62 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -85,7 +85,7 @@ max EIGEN_MAKE_CWISE_BINARY_OP(pow,pow) #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow); +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow) #else /** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent * @@ -212,7 +212,7 @@ EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ) // scalar addition #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum); +EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum) #else /** \returns an expression of \c *this with each coeff incremented by the constant \a scalar * @@ -234,7 +234,7 @@ const CwiseBinaryOp,Constant,Derived> opera #endif #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference); +EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference) #else /** \returns an expression of \c *this with each coeff decremented by the constant \a scalar * diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h index afac08a3f..b51ee9e4c 100644 --- a/Eigen/src/plugins/CommonCwiseBinaryOps.h +++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h @@ -47,7 +47,7 @@ binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other, const Cu #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product); +EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product) #else /** \returns an expression of \c *this scaled by the scalar factor \a scalar * @@ -66,7 +66,7 @@ const CwiseBinaryOp,Constant,Derived> o #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient); +EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient) #else /** \returns an expression of \c *this divided by the scalar value \a scalar * From b055590e9135ffe762775ec919e490513b6974fa Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 16 Jun 2016 11:37:40 -0700 Subject: [PATCH 75/85] Made log1p_impl usable inside a GPU kernel --- Eigen/src/Core/MathFunctions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index ece04b754..342b47ced 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -462,7 +462,7 @@ struct arg_retval template::IsComplex > struct log1p_impl { - static inline Scalar run(const Scalar& x) + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) typedef typename NumTraits::Real RealScalar; @@ -472,7 +472,7 @@ struct log1p_impl } }; -#if EIGEN_HAS_CXX11_MATH +#if EIGEN_HAS_CXX11_MATH && !defined(__CUDACC__) template struct log1p_impl { static inline Scalar run(const Scalar& x) From de32f8d656c3ea7855ced77457ea661e43d417b7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 20 Jun 2016 10:46:45 -0700 Subject: [PATCH 76/85] Fixed the printing of rank-0 tensors --- unsupported/Eigen/CXX11/src/Tensor/TensorIO.h | 68 +++++++++++++------ unsupported/test/cxx11_tensor_io.cpp | 16 +++++ 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index 38a833f82..3db692ac6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -17,34 +17,58 @@ template<> struct significant_decimals_impl : significant_decimals_default_impl {}; -} -template -std::ostream& operator << (std::ostream& os, const TensorBase& expr) { - // Evaluate the expression if needed - TensorForcedEvalOp eval = expr.eval(); - TensorEvaluator, DefaultDevice> tensor(eval, DefaultDevice()); - tensor.evalSubExprsIfNeeded(NULL); - - typedef typename internal::remove_const::type Scalar; - typedef typename T::Index Index; - typedef typename TensorEvaluator, DefaultDevice>::Dimensions Dimensions; - const Index total_size = internal::array_prod(tensor.dimensions()); - - // Print the tensor as a 1d vector or a 2d matrix. - static const int rank = internal::array_size::value; - if (rank == 0) { - os << tensor.coeff(0); - } else if (rank == 1) { - Map > array(const_cast(tensor.data()), total_size); - os << array; - } else { +// Print the tensor as a 2d matrix +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); - static const int layout = TensorEvaluator, DefaultDevice>::Layout; + static const int layout = Tensor::Layout; Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); os << matrix; } +}; + + +// Print the tensor as a vector +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + Map > array(const_cast(tensor.data()), total_size); + os << array; + } +}; + + +// Print the tensor as a scalar +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + os << tensor.coeff(0); + } +}; +} + +template +std::ostream& operator << (std::ostream& os, const TensorBase& expr) { + typedef TensorEvaluator, DefaultDevice> Evaluator; + typedef typename Evaluator::Dimensions Dimensions; + + // Evaluate the expression if needed + TensorForcedEvalOp eval = expr.eval(); + Evaluator tensor(eval, DefaultDevice()); + tensor.evalSubExprsIfNeeded(NULL); + + // Print the result + static const int rank = internal::array_size::value; + internal::TensorPrinter::run(os, tensor); // Cleanup. tensor.cleanup(); diff --git a/unsupported/test/cxx11_tensor_io.cpp b/unsupported/test/cxx11_tensor_io.cpp index 8bbcf7089..8267dcadd 100644 --- a/unsupported/test/cxx11_tensor_io.cpp +++ b/unsupported/test/cxx11_tensor_io.cpp @@ -13,6 +13,20 @@ #include +template +static void test_output_0d() +{ + Tensor tensor; + tensor() = 123; + + std::stringstream os; + os << tensor; + + std::string expected("123"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); +} + + template static void test_output_1d() { @@ -101,6 +115,8 @@ static void test_output_const() void test_cxx11_tensor_io() { + CALL_SUBTEST(test_output_0d()); + CALL_SUBTEST(test_output_0d()); CALL_SUBTEST(test_output_1d()); CALL_SUBTEST(test_output_1d()); CALL_SUBTEST(test_output_2d()); From c58df317473277b297dd018a2dc3d8fe85b00c92 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 21 Jun 2016 09:22:43 -0700 Subject: [PATCH 77/85] Handle empty tensors in the print functions --- unsupported/Eigen/CXX11/src/Tensor/TensorIO.h | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index 3db692ac6..58ffaefab 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -26,10 +26,15 @@ struct TensorPrinter { typedef typename internal::remove_const::type Scalar; typedef typename Tensor::Index Index; const Index total_size = internal::array_prod(tensor.dimensions()); - const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); - static const int layout = Tensor::Layout; - Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); - os << matrix; + if (total_size == 0) { + os << "Empty tensor of rank " << Rank; + } + else { + const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); + static const int layout = Tensor::Layout; + Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); + os << matrix; + } } }; @@ -41,8 +46,13 @@ struct TensorPrinter { typedef typename internal::remove_const::type Scalar; typedef typename Tensor::Index Index; const Index total_size = internal::array_prod(tensor.dimensions()); - Map > array(const_cast(tensor.data()), total_size); - os << array; + if (total_size == 0) { + os << "Empty tensor of rank 1"; + } + else { + Map > array(const_cast(tensor.data()), total_size); + os << array; + } } }; From f8fcd6b32d6b1a3613330e553bb3bc52a0007192 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Jun 2016 16:03:11 -0700 Subject: [PATCH 78/85] Turned the constructor of the PerThread struct into what is effectively a constant expression to make the code compatible with a wider range of compilers --- .../Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 30b292352..8d4973ded 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -99,9 +99,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { typedef typename Environment::EnvThread Thread; struct PerThread { - PerThread() : pool(NULL), index(-1) { - rand = std::hash()(std::this_thread::get_id()); - } + PerThread() : pool(NULL), index(-1), rand(0) { } NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. unsigned index; // Worker thread index in pool. uint64_t rand; // Random generator state. @@ -122,6 +120,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { PerThread* pt = GetPerThread(); pt->pool = this; pt->index = index; + pt->rand = std::hash()(std::this_thread::get_id()); Queue* q = queues_[index]; EventCount::Waiter* waiter = &waiters_[index]; for (;;) { From a29a2cb4ff4eb153d8e725f126f178371cc0468c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 22 Jun 2016 16:43:02 -0700 Subject: [PATCH 79/85] Silenced a couple of compilation warnings generated by xcode --- .../Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 8d4973ded..c6db01a6b 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -99,7 +99,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { typedef typename Environment::EnvThread Thread; struct PerThread { - PerThread() : pool(NULL), index(-1), rand(0) { } + constexpr PerThread() : pool(NULL), index(-1), rand(0) { } NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads. unsigned index; // Worker thread index in pool. uint64_t rand; // Random generator state. @@ -247,7 +247,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { // Update the internal state *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; // Generate the random output (using the PCG-XSH-RS scheme) - return (current ^ (current >> 22)) >> (22 + (current >> 61)); + return static_cast((current ^ (current >> 22)) >> (22 + (current >> 61))); } }; From a3f7edf7e7672094190e04a0b4417de1abfa3de5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 10:25:04 +0200 Subject: [PATCH 80/85] Biug 1242: fix comma init with empty matrices. --- Eigen/src/Core/CommaInitializer.h | 7 +++++-- test/commainitializer.cpp | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index 2abc6605c..38b1112ff 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -80,8 +80,11 @@ struct CommaInitializer EIGEN_DEVICE_FUNC CommaInitializer& operator,(const DenseBase& other) { - if(other.cols()==0 || other.rows()==0) + if(other.rows()==0) + { + m_col += other.cols(); return *this; + } if (m_col==m_xpr.cols()) { m_row+=m_currentBlockRows; @@ -90,7 +93,7 @@ struct CommaInitializer eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() && "Too many rows passed to comma initializer (operator<<)"); } - eigen_assert(m_col A1; + Matrix A2; + Matrix B; + B << A1, A2; + } + { + Matrix A1; + Matrix A2; + Matrix B; + B << A1, + A2; + } + } + } From 76faf4a9657efeed089aeedc98a769410c32d3d7 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 14:27:20 +0200 Subject: [PATCH 81/85] Introduce a NumTraits::Literal type to be used for literals, and improve mixing type support in operations between arrays and scalars: - 2 * ArrayXcf is now optimized in the sense that the integer 2 is properly promoted to a float instead of a complex (fix a regression) - 2.1 * ArrayXi is now forbiden (previously, 2.1 was converted to 2) - This mechanism should be applicable to any custom scalar type, assuming NumTraits::Literal is properly defined (it defaults to T) --- Eigen/src/Core/NumTraits.h | 11 ++++++++--- Eigen/src/Core/util/Macros.h | 30 ++++++++---------------------- Eigen/src/Core/util/XprHelper.h | 28 ++++++++++++++++++++++++++++ test/mixingtypes.cpp | 5 +++++ test/nesting_ops.cpp | 4 ++-- 5 files changed, 51 insertions(+), 27 deletions(-) diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index e065fa714..03f64a8e9 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -22,14 +22,16 @@ namespace Eigen { * This class stores enums, typedefs and static methods giving information about a numeric type. * * The provided data consists of: - * \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real, - * then \a Real is just a typedef to \a T. If \a T is \c std::complex then \a Real + * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real, + * then \c Real is just a typedef to \a T. If \a T is \c std::complex then \c Real * is a typedef to \a U. - * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values, + * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values, * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is * only intended as a helper for code that needs to explicitly promote types. + * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex, Literal is defined as \c U. + * Of course, this type must be fully compatible with \a T. In doubt, just use \a T here. * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what * this means, just use \a T here. * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex @@ -84,6 +86,7 @@ template struct GenericNumTraits T >::type NonInteger; typedef T Nested; + typedef T Literal; EIGEN_DEVICE_FUNC static inline Real epsilon() @@ -145,6 +148,7 @@ template struct NumTraits > : GenericNumTraits > { typedef _Real Real; + typedef typename NumTraits<_Real>::Literal Literal; enum { IsComplex = 1, RequireInitialization = NumTraits<_Real>::RequireInitialization, @@ -168,6 +172,7 @@ struct NumTraits > typedef typename NumTraits::NonInteger NonIntegerScalar; typedef Array NonInteger; typedef ArrayType & Nested; + typedef typename NumTraits::Literal Literal; enum { IsComplex = NumTraits::IsComplex, diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 87cc44657..6de21d2bb 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -906,35 +906,21 @@ namespace Eigen { const typename internal::plain_constant_type::type, const EXPR> #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \ - EIGEN_DEVICE_FUNC inline \ - const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,OPNAME) \ - (METHOD)(const Scalar& scalar) const { \ - return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,OPNAME)(derived(), \ - typename internal::plain_constant_type::type(derived().rows(), derived().cols(), scalar)); \ - } \ - \ template EIGEN_DEVICE_FUNC inline \ - typename internal::enable_if >::Defined, \ - const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME) >::type \ + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg >::Defined>::type,OPNAME) \ (METHOD)(const T& scalar) const { \ - return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,T,OPNAME)(derived(), \ - typename internal::plain_constant_type::type(derived().rows(), derived().cols(), scalar)); \ + typedef typename internal::promote_scalar_arg >::Defined>::type PromotedT; \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \ + typename internal::plain_constant_type::type(derived().rows(), derived().cols(), internal::scalar_constant_op(scalar))); \ } #define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ - EIGEN_DEVICE_FUNC inline friend \ - const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,OPNAME) \ - (METHOD)(const Scalar& scalar, const StorageBaseType& matrix) { \ - return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,OPNAME)( \ - typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), scalar), matrix.derived()); \ - } \ - \ template EIGEN_DEVICE_FUNC inline friend \ - typename internal::enable_if >::Defined, \ - const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME) >::type \ + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg >::Defined>::type,Derived,OPNAME) \ (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ - return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(T,Derived,OPNAME)( \ - typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), scalar), matrix.derived()); \ + typedef typename internal::promote_scalar_arg >::Defined>::type PromotedT; \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \ + typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op(scalar)), matrix.derived()); \ } #define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \ diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index c41c408b0..b372ac1ad 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -45,6 +45,34 @@ inline IndexDest convert_index(const IndexSrc& idx) { } +// promote_scalar_arg is an helper used in operation between an expression and a scalar, like: +// expression * scalar +// Its role is to determine how the type T of the scalar operand should be promoted given the scalar type ExprScalar of the given expression. +// The IsSupported template parameter must be provided by the caller as: ScalarBinaryOpTraits::Defined using the proper order for ExprScalar and T. +// Then the logic is as follows: +// - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned. +// - otherwise, NumTraits::Literal is returned if T is implicitly convertible to NumTraits::Literal AND that this does not imply a float to integer conversion. +// - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE). +template::Literal>::value, + bool IsSafe = NumTraits::IsInteger || !NumTraits::Literal>::IsInteger> +struct promote_scalar_arg +{ +}; + +template +struct promote_scalar_arg +{ + typedef T type; +}; + +template +struct promote_scalar_arg +{ + typedef typename NumTraits::Literal type; +}; + //classes inheriting no_assignment_operator don't generate a default operator=. class no_assignment_operator { diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index fe8c16470..57ef85c32 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -79,6 +79,11 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_MIX_SCALAR(vf * scf , vf.template cast >() * scf); VERIFY_MIX_SCALAR(scd * vd , scd * vd.template cast >()); + VERIFY_MIX_SCALAR(vcf * 2 , vcf * complex(2)); + VERIFY_MIX_SCALAR(vcf * 2.1 , vcf * complex(2.1)); + VERIFY_MIX_SCALAR(2 * vcf, vcf * complex(2)); + VERIFY_MIX_SCALAR(2.1 * vcf , vcf * complex(2.1)); + // check scalar quotients VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); diff --git a/test/nesting_ops.cpp b/test/nesting_ops.cpp index 2f5025305..a419b0e44 100644 --- a/test/nesting_ops.cpp +++ b/test/nesting_ops.cpp @@ -75,8 +75,8 @@ template void run_nesting_ops_2(const MatrixType& _m) } else { - VERIFY( verify_eval_type<1>(2*m1, 2*m1) ); - VERIFY( verify_eval_type<2>(2*m1, m1) ); + VERIFY( verify_eval_type<2>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<3>(2*m1, m1) ); } VERIFY( verify_eval_type<2>(m1+m1, m1+m1) ); VERIFY( verify_eval_type<3>(m1+m1, m1) ); From bf2d5edecc46808b42baeaf3eaced816b34f3364 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 15:35:17 +0200 Subject: [PATCH 82/85] Fix warning. --- Eigen/src/misc/RealSvd2x2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/misc/RealSvd2x2.h b/Eigen/src/misc/RealSvd2x2.h index cdd7777d2..dfaaa0b17 100644 --- a/Eigen/src/misc/RealSvd2x2.h +++ b/Eigen/src/misc/RealSvd2x2.h @@ -51,4 +51,4 @@ void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, } // end namespace Eigen -#endif // EIGEN_REALSVD2X2_H \ No newline at end of file +#endif // EIGEN_REALSVD2X2_H From 55fc04e8b5d9f3bf3b82bc4203a1562f0c7637dd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 15:36:42 +0200 Subject: [PATCH 83/85] Fix operator priority --- Eigen/src/Core/CommaInitializer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index 38b1112ff..787743b8f 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -93,7 +93,7 @@ struct CommaInitializer eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() && "Too many rows passed to comma initializer (operator<<)"); } - eigen_assert(m_col Date: Thu, 23 Jun 2016 18:47:31 +0200 Subject: [PATCH 84/85] bug #1241: does not emmit anything for empty tensors --- unsupported/Eigen/CXX11/src/Tensor/TensorIO.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index 58ffaefab..f3a3a1b88 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -26,10 +26,7 @@ struct TensorPrinter { typedef typename internal::remove_const::type Scalar; typedef typename Tensor::Index Index; const Index total_size = internal::array_prod(tensor.dimensions()); - if (total_size == 0) { - os << "Empty tensor of rank " << Rank; - } - else { + if (total_size > 0) { const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); static const int layout = Tensor::Layout; Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); @@ -46,10 +43,7 @@ struct TensorPrinter { typedef typename internal::remove_const::type Scalar; typedef typename Tensor::Index Index; const Index total_size = internal::array_prod(tensor.dimensions()); - if (total_size == 0) { - os << "Empty tensor of rank 1"; - } - else { + if (total_size > 0) { Map > array(const_cast(tensor.data()), total_size); os << array; } From 361dbd246d0b0f0ceff8d6dea6991807cffde821 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 23 Jun 2016 18:54:30 +0200 Subject: [PATCH 85/85] Add unit test for printing empty tensors --- unsupported/test/cxx11_tensor_io.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/unsupported/test/cxx11_tensor_io.cpp b/unsupported/test/cxx11_tensor_io.cpp index 8267dcadd..489960529 100644 --- a/unsupported/test/cxx11_tensor_io.cpp +++ b/unsupported/test/cxx11_tensor_io.cpp @@ -40,6 +40,12 @@ static void test_output_1d() std::string expected("0\n1\n2\n3\n4"); VERIFY_IS_EQUAL(std::string(os.str()), expected); + + Eigen::Tensor empty_tensor(0); + std::stringstream empty_os; + empty_os << empty_tensor; + std::string empty_string; + VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string); }