diff --git a/Eigen/Core b/Eigen/Core index e6dbe3a8e..f87f3d2f6 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -318,6 +318,7 @@ using std::ptrdiff_t; #include "src/Core/PlainObjectBase.h" #include "src/Core/Matrix.h" #include "src/Core/Array.h" +#include "src/Core/Fill.h" #include "src/Core/CwiseTernaryOp.h" #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h" diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index f40b2f433..e70d5551a 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -737,20 +737,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_dense_assignment dense_assignment_loop::run(kernel); } -// Specialization for filling the destination with a constant value. -#if !EIGEN_COMP_MSVC -#ifndef EIGEN_GPU_COMPILE_PHASE -template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop( - DstXprType& dst, - const Eigen::CwiseNullaryOp, DstXprType>& src, - const internal::assign_op& func) { - resize_if_allowed(dst, src, func); - std::fill_n(dst.data(), dst.size(), src.functor()()); -} -#endif -#endif - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) { call_dense_assignment_loop(dst, src, internal::assign_op()); diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 39c33cf03..86ddd5e4d 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -343,7 +343,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar */ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) { - return derived() = Constant(rows(), cols(), val); + internal::eigen_fill_impl::run(derived(), val); + return derived(); } /** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val. @@ -547,7 +548,8 @@ EIGEN_DEVICE_FUNC bool DenseBase::isZero(const RealScalar& prec) const */ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setZero() { - return setConstant(Scalar(0)); + internal::eigen_zero_impl::run(derived()); + return derived(); } /** Resizes to the given \a size, and sets all coefficients in this expression to zero. @@ -562,7 +564,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setZero() { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index newSize) { resize(newSize); - return setConstant(Scalar(0)); + return setZero(); } /** Resizes to the given size, and sets all coefficients in this expression to zero. @@ -578,7 +580,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index rows, Index cols) { resize(rows, cols); - return setConstant(Scalar(0)); + return setZero(); } /** Resizes to the given size, changing only the number of columns, and sets all diff --git a/Eigen/src/Core/Fill.h b/Eigen/src/Core/Fill.h new file mode 100644 index 000000000..048b51d98 --- /dev/null +++ b/Eigen/src/Core/Fill.h @@ -0,0 +1,94 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charles Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FILL_H +#define EIGEN_FILL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct eigen_fill_helper : std::false_type {}; + +template +struct eigen_fill_helper> : std::true_type {}; + +template +struct eigen_fill_helper> : std::true_type {}; + +template +struct eigen_fill_helper> : eigen_fill_helper {}; + +template +struct eigen_fill_helper> + : std::integral_constant::value && + (Xpr::IsRowMajor ? (BlockRows == 1) : (BlockCols == 1))> {}; + +template +struct eigen_fill_helper> + : std::integral_constant::value && + (evaluator>::Flags & LinearAccessBit)> {}; + +template ::value> +struct eigen_fill_impl { + using Scalar = typename Xpr::Scalar; + using Func = scalar_constant_op; + using PlainObject = typename Xpr::PlainObject; + using Constant = CwiseNullaryOp; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) { + dst = Constant(dst.rows(), dst.cols(), Func(val)); + } +}; + +#if !EIGEN_COMP_MSVC +#ifndef EIGEN_GPU_COMPILE_PHASE +template +struct eigen_fill_impl { + using Scalar = typename Xpr::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) { + EIGEN_USING_STD(fill_n); + fill_n(dst.data(), dst.size(), val); + } +}; +#endif +#endif + +template +struct eigen_memset_helper { + static constexpr bool value = std::is_trivial::value && eigen_fill_helper::value; +}; + +template ::value> +struct eigen_zero_impl { + using Scalar = typename Xpr::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) { eigen_fill_impl::run(dst, Scalar(0)); } +}; + +template +struct eigen_zero_impl { + using Scalar = typename Xpr::Scalar; + static constexpr size_t max_bytes = (std::numeric_limits::max)(); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) { + const size_t num_bytes = dst.size() * sizeof(Scalar); +#ifndef EIGEN_NO_DEBUG + if (num_bytes > max_bytes) throw_std_bad_alloc(); +#endif + EIGEN_USING_STD(memset); + memset(dst.data(), 0, num_bytes); + } +}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_FILL_H diff --git a/test/block.cpp b/test/block.cpp index c67dd225a..8f7c8de68 100644 --- a/test/block.cpp +++ b/test/block.cpp @@ -73,6 +73,21 @@ void block(const MatrixType& m) { block_real_only(m1, r1, r2, c1, c1, s1); + // test fill logic with innerpanel and non-innerpanel blocks + m1.row(r1).setConstant(s1); + VERIFY_IS_CWISE_EQUAL(m1.row(r1), DynamicVectorType::Constant(cols, s1).transpose()); + m1 = m1_copy; + m1.col(c1).setConstant(s1); + VERIFY_IS_CWISE_EQUAL(m1.col(c1), DynamicVectorType::Constant(rows, s1)); + m1 = m1_copy; + // test setZero logic with innerpanel and non-innerpanel blocks + m1.row(r1).setZero(); + VERIFY_IS_CWISE_EQUAL(m1.row(r1), DynamicVectorType::Zero(cols).transpose()); + m1 = m1_copy; + m1.col(c1).setZero(); + VERIFY_IS_CWISE_EQUAL(m1.col(c1), DynamicVectorType::Zero(rows)); + m1 = m1_copy; + // check row() and col() VERIFY_IS_EQUAL(m1.col(c1).transpose(), m1.transpose().row(c1)); // check operator(), both constant and non-constant, on row() and col() diff --git a/test/mapped_matrix.cpp b/test/mapped_matrix.cpp index a8d17d900..78ae09cb0 100644 --- a/test/mapped_matrix.cpp +++ b/test/mapped_matrix.cpp @@ -75,8 +75,23 @@ void map_class_matrix(const MatrixType& m) { Map map4(array4, rows, cols); VERIFY_IS_EQUAL(map1, MatrixType::Ones(rows, cols)); + map1.setConstant(s1); + VERIFY_IS_EQUAL(map1, MatrixType::Constant(rows, cols, s1)); + map1.setZero(); + VERIFY_IS_EQUAL(map1, MatrixType::Zero(rows, cols)); + VERIFY_IS_EQUAL(map2, MatrixType::Ones(rows, cols)); + map2.setConstant(s1); + VERIFY_IS_EQUAL(map2, MatrixType::Constant(rows, cols, s1)); + map2.setZero(); + VERIFY_IS_EQUAL(map2, MatrixType::Zero(rows, cols)); + VERIFY_IS_EQUAL(map3, MatrixType::Ones(rows, cols)); + map3.setConstant(s1); + VERIFY_IS_EQUAL(map3, MatrixType::Constant(rows, cols, s1)); + map3.setZero(); + VERIFY_IS_EQUAL(map3, MatrixType::Zero(rows, cols)); + map1 = MatrixType::Random(rows, cols); map2 = map1; map3 = map1; diff --git a/test/mapstride.cpp b/test/mapstride.cpp index 224316f88..1a9f50c59 100644 --- a/test/mapstride.cpp +++ b/test/mapstride.cpp @@ -94,6 +94,8 @@ void map_class_matrix(const MatrixType& _m) { VERIFY_IS_APPROX(s1 * map, s1 * m); map *= s1; VERIFY_IS_APPROX(map, s1 * m); + map.setZero(); + VERIFY_IS_CWISE_EQUAL(map, MatrixType::Zero(rows, cols)); } // test no inner stride and an outer stride of +4. This is quite important as for fixed-size matrices, @@ -118,6 +120,8 @@ void map_class_matrix(const MatrixType& _m) { VERIFY_IS_APPROX(s1 * map, s1 * m); map *= s1; VERIFY_IS_APPROX(map, s1 * m); + map.setZero(); + VERIFY_IS_CWISE_EQUAL(map, MatrixType::Zero(rows, cols)); } // test both inner stride and outer stride @@ -138,6 +142,8 @@ void map_class_matrix(const MatrixType& _m) { VERIFY_IS_APPROX(s1 * map, s1 * m); map *= s1; VERIFY_IS_APPROX(map, s1 * m); + map.setZero(); + VERIFY_IS_CWISE_EQUAL(map, MatrixType::Zero(rows, cols)); } // test inner stride and no outer stride @@ -156,6 +162,8 @@ void map_class_matrix(const MatrixType& _m) { VERIFY_IS_APPROX(s1 * map, s1 * m); map *= s1; VERIFY_IS_APPROX(map, s1 * m); + map.setZero(); + VERIFY_IS_CWISE_EQUAL(map, MatrixType::Zero(rows, cols)); } // test negative strides