From e6b10202218631be755f19c41fe01287b9a37f90 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 24 Jan 2017 13:55:18 -0800 Subject: [PATCH 1/5] Adds a fast memcpy function to Eigen. This takes advantage of the following: 1. For small fixed sizes, the compiler generates inline code for memcpy, which is much faster. 2. My colleague eriche at googl dot com discovered that for large sizes, memmove is significantly faster than memcpy (at least on Linux with GCC or Clang). See benchmark numbers measured on a Haswell (HP Z440) workstation here: https://docs.google.com/a/google.com/spreadsheets/d/1jLs5bKzXwhpTySw65MhG1pZpsIwkszZqQTjwrd_n0ic/pubhtml This is of course surprising since memcpy is a less constrained version of memmove. This stackoverflow thread contains some speculation as to the causes: http://stackoverflow.com/questions/22793669/poor-memcpy-performance-on-linux Below are numbers for copying and slicing tensors using the multithreaded TensorDevice. The numbers show significant improvements for memcpy of very small blocks and for memcpy of large blocks single threaded (we were already able to saturate memory bandwidth for >1 threads before on large blocks). The "slicingSmallPieces" benchmark also shows small consistent improvements, since memcpy cost is a fair portion of that particular computation. The benchmarks operate on NxN matrices, and the names are of the form BM_$OP_${NUMTHREADS}T/${N}. Measured improvements in wall clock time: Run on rmlarsen3.mtv (12 X 3501 MHz CPUs); 2017-01-20T11:26:31.493023454-08:00 CPU: Intel Haswell with HyperThreading (6 cores) dL1:32KB dL2:256KB dL3:15MB Benchmark Base (ns) New (ns) Improvement ------------------------------------------------------------------ BM_memcpy_1T/2 3.48 2.39 +31.3% BM_memcpy_1T/8 12.3 6.51 +47.0% BM_memcpy_1T/64 371 383 -3.2% BM_memcpy_1T/512 66922 66720 +0.3% BM_memcpy_1T/4k 9892867 6849682 +30.8% BM_memcpy_1T/5k 14951099 10332856 +30.9% BM_memcpy_2T/2 3.50 2.46 +29.7% BM_memcpy_2T/8 12.3 7.66 +37.7% BM_memcpy_2T/64 371 376 -1.3% BM_memcpy_2T/512 66652 66788 -0.2% BM_memcpy_2T/4k 6145012 6117776 +0.4% BM_memcpy_2T/5k 9181478 9010942 +1.9% BM_memcpy_4T/2 3.47 2.47 +31.0% BM_memcpy_4T/8 12.3 6.67 +45.8 BM_memcpy_4T/64 374 376 -0.5% BM_memcpy_4T/512 67833 68019 -0.3% BM_memcpy_4T/4k 5057425 5188253 -2.6% BM_memcpy_4T/5k 7555638 7779468 -3.0% BM_memcpy_6T/2 3.51 2.50 +28.8% BM_memcpy_6T/8 12.3 7.61 +38.1% BM_memcpy_6T/64 373 378 -1.3% BM_memcpy_6T/512 66871 66774 +0.1% BM_memcpy_6T/4k 5112975 5233502 -2.4% BM_memcpy_6T/5k 7614180 7772246 -2.1% BM_memcpy_8T/2 3.47 2.41 +30.5% BM_memcpy_8T/8 12.4 10.5 +15.3% BM_memcpy_8T/64 372 388 -4.3% BM_memcpy_8T/512 67373 66588 +1.2% BM_memcpy_8T/4k 5148462 5254897 -2.1% BM_memcpy_8T/5k 7660989 7799058 -1.8% BM_memcpy_12T/2 3.50 2.40 +31.4% BM_memcpy_12T/8 12.4 7.55 +39.1 BM_memcpy_12T/64 374 378 -1.1% BM_memcpy_12T/512 67132 66683 +0.7% BM_memcpy_12T/4k 5185125 5292920 -2.1% BM_memcpy_12T/5k 7717284 7942684 -2.9% BM_slicingSmallPieces_1T/2 47.3 47.5 +0.4% BM_slicingSmallPieces_1T/8 53.6 52.3 +2.4% BM_slicingSmallPieces_1T/64 491 476 +3.1% BM_slicingSmallPieces_1T/512 21734 18814 +13.4% BM_slicingSmallPieces_1T/4k 394660 396760 -0.5% BM_slicingSmallPieces_1T/5k 218722 209244 +4.3% BM_slicingSmallPieces_2T/2 80.7 79.9 +1.0% BM_slicingSmallPieces_2T/8 54.2 53.1 +2.0 BM_slicingSmallPieces_2T/64 497 477 +4.0% BM_slicingSmallPieces_2T/512 21732 18822 +13.4% BM_slicingSmallPieces_2T/4k 392885 390490 +0.6% BM_slicingSmallPieces_2T/5k 221988 208678 +6.0% BM_slicingSmallPieces_4T/2 80.8 80.1 +0.9% BM_slicingSmallPieces_4T/8 54.1 53.2 +1.7% BM_slicingSmallPieces_4T/64 493 476 +3.4% BM_slicingSmallPieces_4T/512 21702 18758 +13.6% BM_slicingSmallPieces_4T/4k 393962 404023 -2.6% BM_slicingSmallPieces_4T/5k 249667 211732 +15.2% BM_slicingSmallPieces_6T/2 80.5 80.1 +0.5% BM_slicingSmallPieces_6T/8 54.4 53.4 +1.8% BM_slicingSmallPieces_6T/64 488 478 +2.0% BM_slicingSmallPieces_6T/512 21719 18841 +13.3% BM_slicingSmallPieces_6T/4k 394950 397583 -0.7% BM_slicingSmallPieces_6T/5k 223080 210148 +5.8% BM_slicingSmallPieces_8T/2 81.2 80.4 +1.0% BM_slicingSmallPieces_8T/8 58.1 53.5 +7.9% BM_slicingSmallPieces_8T/64 489 480 +1.8% BM_slicingSmallPieces_8T/512 21586 18798 +12.9% BM_slicingSmallPieces_8T/4k 394592 400165 -1.4% BM_slicingSmallPieces_8T/5k 219688 208301 +5.2% BM_slicingSmallPieces_12T/2 80.2 79.8 +0.7% BM_slicingSmallPieces_12T/8 54.4 53.4 +1.8 BM_slicingSmallPieces_12T/64 488 476 +2.5% BM_slicingSmallPieces_12T/512 21931 18831 +14.1% BM_slicingSmallPieces_12T/4k 393962 396541 -0.7% BM_slicingSmallPieces_12T/5k 218803 207965 +5.0% --- Eigen/src/Core/util/Memory.h | 61 +++++++++++++++---- .../CXX11/src/Tensor/TensorContraction.h | 2 +- .../CXX11/src/Tensor/TensorDeviceDefault.h | 2 +- .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 2 +- .../Eigen/CXX11/src/Tensor/TensorFFT.h | 4 +- 5 files changed, 53 insertions(+), 18 deletions(-) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index c634d7ea0..6b8e307c8 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -63,7 +63,7 @@ namespace Eigen { namespace internal { -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() { #ifdef EIGEN_EXCEPTIONS @@ -74,6 +74,41 @@ inline void throw_std_bad_alloc() #endif } +EIGEN_DEVICE_FUNC +inline void fast_memcpy(void* dst, const void* src, size_t size) { +#if defined(__CUDA__) || defined(__ANDROID__) + ::memcpy(dst, src, size); +#else + switch(size) { + // Most compilers will generate inline code for fixed sizes, + // which is significantly faster for small copies. + case 1: memcpy(dst, src, 1); break; + case 2: memcpy(dst, src, 2); break; + case 3: memcpy(dst, src, 3); break; + case 4: memcpy(dst, src, 4); break; + case 5: memcpy(dst, src, 5); break; + case 6: memcpy(dst, src, 6); break; + case 7: memcpy(dst, src, 7); break; + case 8: memcpy(dst, src, 8); break; + case 9: memcpy(dst, src, 9); break; + case 10: memcpy(dst, src, 10); break; + case 11: memcpy(dst, src, 11); break; + case 12: memcpy(dst, src, 12); break; + case 13: memcpy(dst, src, 13); break; + case 14: memcpy(dst, src, 14); break; + case 15: memcpy(dst, src, 15); break; + case 16: memcpy(dst, src, 16); break; +#ifdef EIGEN_OS_LINUX + // On Linux, memmove appears to be faster than memcpy for + // large sizes, strangely enough. + default: memmove(dst, src, size); break; +#else + default: memcpy(dst, src, size); break; +#endif + } +#endif +} + /***************************************************************************** *** Implementation of handmade aligned functions *** *****************************************************************************/ @@ -114,7 +149,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = void *previous_aligned = static_cast(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); - + *(reinterpret_cast(aligned) - 1) = original; return aligned; } @@ -142,7 +177,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); } -#else +#else EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {} #endif @@ -471,8 +506,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index } /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size - */ -template + */ +template inline Index first_multiple(Index size, Index base) { return ((size+base-1)/base)*base; @@ -502,7 +537,7 @@ template struct smart_copy_helper { { std::copy(start, end, target); } }; -// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. +// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. template struct smart_memmove_helper; template void smart_memmove(const T* start, const T* end, T* target) @@ -522,15 +557,15 @@ template struct smart_memmove_helper { template struct smart_memmove_helper { static inline void run(const T* start, const T* end, T* target) - { + { if (UIntPtr(target) < UIntPtr(start)) { std::copy(start, end, target); } - else + else { std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); - std::copy_backward(start, end, target + count); + std::copy_backward(start, end, target + count); } } }; @@ -603,7 +638,7 @@ template void swap(scoped_array &a,scoped_array &b) { std::swap(a.ptr(),b.ptr()); } - + } // end namespace internal /** \internal @@ -622,7 +657,7 @@ template void swap(scoped_array &a,scoped_array &b) * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. */ #ifdef EIGEN_ALLOCA - + #if EIGEN_DEFAULT_ALIGN_BYTES>0 // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. @@ -645,7 +680,7 @@ template void swap(scoped_array &a,scoped_array &b) Eigen::internal::check_size_for_overflow(SIZE); \ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ Eigen::internal::aligned_stack_memory_handler EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) - + #endif @@ -701,7 +736,7 @@ template void swap(scoped_array &a,scoped_array &b) * Example: * \code * // Matrix4f requires 16 bytes alignment: -* std::map< int, Matrix4f, std::less, +* std::map< int, Matrix4f, std::less, * aligned_allocator > > my_map_mat4; * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: * std::map< int, Vector3f > my_map_vec3; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 442c14fac..39012b937 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -56,7 +56,7 @@ void pack_simple(Scalar * dst, const Scalar * src, Index cols, Index rows, Index } else { // Naive memcpy calls for (Index col = 0; col < cols; ++col) { - memcpy(dst + col*lddst, src + col*ldsrc, rows*sizeof(Scalar)); + internal::fast_memcpy(dst + col*lddst, src + col*ldsrc, rows*sizeof(Scalar)); } } } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h index ccaaa6cb2..b133781ae 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -22,7 +22,7 @@ struct DefaultDevice { internal::aligned_free(buffer); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); + internal::fast_memcpy(dst, src, n); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { memcpy(dst, src, n); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 16180ca69..facdea735 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -106,7 +106,7 @@ struct ThreadPoolDevice { } EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { - ::memcpy(dst, src, n); + internal::fast_memcpy(dst, src, n); } EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { memcpy(dst, src, n); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index 08eb5595a..f060191ab 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -253,7 +253,7 @@ struct TensorEvaluator, D // get data into line_buf const Index stride = m_strides[dim]; if (stride == 1) { - memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar)); + m_device.memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar)); } else { Index offset = base_offset; for (int j = 0; j < line_len; ++j, offset += stride) { @@ -271,7 +271,7 @@ struct TensorEvaluator, D // write back if (FFTDir == FFT_FORWARD && stride == 1) { - memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar)); + m_device.memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar)); } else { Index offset = base_offset; const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0); From 3be5ee2352423427c95b133ed749f4d9316fe135 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 24 Jan 2017 14:22:49 -0800 Subject: [PATCH 2/5] Update copy helper to use fast_memcpy. --- Eigen/src/Core/util/Memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 6b8e307c8..572b1fe69 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -528,7 +528,7 @@ template struct smart_copy_helper { IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); - memcpy(target, start, size); + fast_memcpy(target, start, size); } }; From d06a48959abac6369336d3873d46aee78f8fbec2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 25 Jan 2017 15:27:13 +0100 Subject: [PATCH 3/5] bug #1383: Fix regression from 3.2 with LinSpaced(n,0,n-1) with n==0. --- Eigen/src/Core/functors/NullaryFunctors.h | 2 +- test/nullary.cpp | 24 +++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 0311d9035..60c5f7f2a 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -93,7 +93,7 @@ struct linspaced_op_impl linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : m_low(low), m_multiplier((high-low)/convert_index(num_steps<=1 ? 1 : num_steps-1)), - m_divisor(convert_index(num_steps+high-low)/(high-low+1)), + m_divisor(convert_index(num_steps+high-low)/((high-low+1)==0?1:(high-low+1))), m_use_divisor((high+1)<(low+num_steps)) {} diff --git a/test/nullary.cpp b/test/nullary.cpp index 351d26e74..bb0cea937 100644 --- a/test/nullary.cpp +++ b/test/nullary.cpp @@ -152,6 +152,30 @@ void testVectorType(const VectorType& base) m.tail(size-1).setLinSpaced(low, high); VERIFY_IS_APPROX(m(size-1), high); } + + // regression test for bug 1383 (LinSpaced with empty size/range) + { + Index n0 = VectorType::SizeAtCompileTime==Dynamic ? 0 : VectorType::SizeAtCompileTime; + low = internal::random(); + m = VectorType::LinSpaced(n0,low,low-1); + VERIFY(m.size()==n0); + + if(VectorType::SizeAtCompileTime==Dynamic) + { + VERIFY_IS_EQUAL(VectorType::LinSpaced(n0,0,Scalar(n0-1)).sum(),Scalar(0)); + VERIFY_IS_EQUAL(VectorType::LinSpaced(n0,low,low-1).sum(),Scalar(0)); + } + + m.setLinSpaced(n0,0,Scalar(n0-1)); + VERIFY(m.size()==n0); + m.setLinSpaced(n0,low,low-1); + VERIFY(m.size()==n0); + + // empty range only: + VERIFY_IS_APPROX(VectorType::LinSpaced(size,low,low),VectorType::Constant(size,low)); + m.setLinSpaced(size,low,low); + VERIFY_IS_APPROX(m,VectorType::Constant(size,low)); + } } template From 296d24be4dd3c700089d1d9182a843c60d68019c Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 25 Jan 2017 17:39:01 +0100 Subject: [PATCH 4/5] bug #1381: fix sparse.diagonal() used as a rvalue. The problem was that is "sparse" is not const, then sparse.diagonal() must have the LValueBit flag meaning that sparse.diagonal().coeff(i) must returns a const reference, const Scalar&. However, sparse::coeff() cannot returns a reference for a non-existing zero coefficient. The trick is to return a reference to a local member of evaluator. --- Eigen/src/Core/CoreEvaluators.h | 4 +-- Eigen/src/Core/util/XprHelper.h | 2 +- Eigen/src/SparseCore/SparseCompressedBase.h | 38 +++++++++++++++------ test/sparse_basic.cpp | 4 +++ 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 24fc7835b..412f5a661 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -1613,9 +1613,7 @@ struct evaluator > { } typedef typename XprType::Scalar Scalar; - // FIXME having to check whether ArgType is sparse here i not very nice. - typedef typename internal::conditional::value, - typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; + typedef typename XprType::CoeffReturnType CoeffReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index) const diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index b94dd61ff..d5e565cb8 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -638,7 +638,7 @@ struct plain_constant_type template struct is_lvalue { - enum { value = !bool(is_const::value) && + enum { value = (!bool(is_const::value)) && bool(traits::Flags & LvalueBit) }; }; diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h index d32a8df40..e0b3c22b6 100644 --- a/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/Eigen/src/SparseCore/SparseCompressedBase.h @@ -295,11 +295,11 @@ struct evaluator > Flags = Derived::Flags }; - evaluator() : m_matrix(0) + evaluator() : m_matrix(0), m_zero(0) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - explicit evaluator(const Derived &mat) : m_matrix(&mat) + explicit evaluator(const Derived &mat) : m_matrix(&mat), m_zero(0) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -312,26 +312,42 @@ struct evaluator > operator const Derived&() const { return *m_matrix; } typedef typename DenseCoeffsBase::CoeffReturnType CoeffReturnType; - Scalar coeff(Index row, Index col) const - { return m_matrix->coeff(row,col); } - + const Scalar& coeff(Index row, Index col) const + { + Index p = find(row,col); + + if(p==Dynamic) + return m_zero; + else + return m_matrix->const_cast_derived().valuePtr()[p]; + } + Scalar& coeffRef(Index row, Index col) + { + Index p = find(row,col); + eigen_assert(p!=Dynamic && "written coefficient does not exist"); + return m_matrix->const_cast_derived().valuePtr()[p]; + } + +protected: + + Index find(Index row, Index col) const { eigen_internal_assert(row>=0 && rowrows() && col>=0 && colcols()); - + const Index outer = Derived::IsRowMajor ? row : col; const Index inner = Derived::IsRowMajor ? col : row; Index start = m_matrix->outerIndexPtr()[outer]; Index end = m_matrix->isCompressed() ? m_matrix->outerIndexPtr()[outer+1] : m_matrix->outerIndexPtr()[outer] + m_matrix->innerNonZeroPtr()[outer]; - eigen_assert(end>start && "you are using a non finalized sparse matrix or written coefficient does not exist"); - const Index p = std::lower_bound(m_matrix->innerIndexPtr()+start, m_matrix->innerIndexPtr()+end,inner) - - m_matrix->innerIndexPtr(); - eigen_assert((pinnerIndexPtr()[p]==inner) && "written coefficient does not exist"); - return m_matrix->const_cast_derived().valuePtr()[p]; + eigen_assert(end>=start && "you are using a non finalized sparse matrix or written coefficient does not exist"); + const Index p = std::lower_bound(m_matrix->innerIndexPtr()+start, m_matrix->innerIndexPtr()+end,inner) - m_matrix->innerIndexPtr(); + + return ((pinnerIndexPtr()[p]==inner)) ? p : Dynamic; } const Derived *m_matrix; + const Scalar m_zero; }; } diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 208a66f12..91b7cb335 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -485,6 +485,10 @@ template void sparse_basic(const SparseMatrixType& re SparseMatrixType m2(rows, cols); initSparse(density, refMat2, m2); VERIFY_IS_APPROX(m2.diagonal(), refMat2.diagonal().eval()); + DenseVector d = m2.diagonal(); + VERIFY_IS_APPROX(d, refMat2.diagonal().eval()); + d = m2.diagonal().array(); + VERIFY_IS_APPROX(d, refMat2.diagonal().eval()); VERIFY_IS_APPROX(const_cast(m2).diagonal(), refMat2.diagonal().eval()); initSparse(density, refMat2, m2, ForceNonZeroDiag); From 850ca961d28df99a0ba44bd8bf034ac08e39686e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 25 Jan 2017 18:13:53 +0100 Subject: [PATCH 5/5] bug #1383: fix regression in LinSpaced for integers and high linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : m_low(low), m_multiplier((high-low)/convert_index(num_steps<=1 ? 1 : num_steps-1)), - m_divisor(convert_index(num_steps+high-low)/((high-low+1)==0?1:(high-low+1))), - m_use_divisor((high+1)<(low+num_steps)) + m_divisor(convert_index((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))), + m_use_divisor(num_steps>1 && (numext::abs(high-low)+1) diff --git a/test/nullary.cpp b/test/nullary.cpp index bb0cea937..acd55506e 100644 --- a/test/nullary.cpp +++ b/test/nullary.cpp @@ -175,6 +175,21 @@ void testVectorType(const VectorType& base) VERIFY_IS_APPROX(VectorType::LinSpaced(size,low,low),VectorType::Constant(size,low)); m.setLinSpaced(size,low,low); VERIFY_IS_APPROX(m,VectorType::Constant(size,low)); + + if(NumTraits::IsInteger) + { + VERIFY_IS_APPROX( VectorType::LinSpaced(size,low,Scalar(low+size-1)), VectorType::LinSpaced(size,Scalar(low+size-1),low).reverse() ); + + if(VectorType::SizeAtCompileTime==Dynamic) + { + // Check negative multiplicator path: + for(Index k=1; k<5; ++k) + VERIFY_IS_APPROX( VectorType::LinSpaced(size,low,Scalar(low+(size-1)*k)), VectorType::LinSpaced(size,Scalar(low+(size-1)*k),low).reverse() ); + // Check negative divisor path: + for(Index k=1; k<5; ++k) + VERIFY_IS_APPROX( VectorType::LinSpaced(size*k,low,Scalar(low+size-1)), VectorType::LinSpaced(size*k,Scalar(low+size-1),low).reverse() ); + } + } } } @@ -222,7 +237,8 @@ void test_nullary() CALL_SUBTEST_8( testVectorType(Matrix()) ); CALL_SUBTEST_8( testVectorType(Matrix()) ); - CALL_SUBTEST_9( testVectorType(VectorXi(internal::random(1,300))) ); + CALL_SUBTEST_9( testVectorType(VectorXi(internal::random(1,10))) ); + CALL_SUBTEST_9( testVectorType(VectorXi(internal::random(9,300))) ); CALL_SUBTEST_9( testVectorType(Matrix()) ); }