// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2023 Charlie Schlosser // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #define EIGEN_USE_THREADS 1 #include "main.h" #include namespace Eigen { namespace internal { // conveniently control vectorization logic template struct scalar_dummy_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar& a) const { return a; } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return a; } }; template struct functor_traits > { enum { Cost = 1'000'000, PacketAccess = Vectorize && packet_traits::Vectorizable }; }; } // namespace internal } // namespace Eigen template void test_threaded_assignment(const PlainObject&, Index rows = PlainObject::RowsAtCompileTime, Index cols = PlainObject::ColsAtCompileTime) { using Scalar = typename PlainObject::Scalar; using VectorizationOff = internal::scalar_dummy_op; using VectorizationOn = internal::scalar_dummy_op; int threads = 4; ThreadPool pool(threads); CoreThreadPoolDevice threadPoolDevice(pool); PlainObject dst(rows, cols), ref(rows, cols), rhs(rows, cols); rhs.setRandom(); const auto rhs_xpr = rhs.cwiseAbs2(); // linear access dst.setRandom(); ref.setRandom(); ref = rhs_xpr.unaryExpr(VectorizationOff()); dst.device(threadPoolDevice) = rhs_xpr.unaryExpr(VectorizationOff()); VERIFY_IS_CWISE_EQUAL(ref, dst); ref = rhs_xpr.unaryExpr(VectorizationOn()); dst.device(threadPoolDevice) = rhs_xpr.unaryExpr(VectorizationOn()); VERIFY_IS_CWISE_EQUAL(ref, dst); // outer-inner access Index blockRows = numext::maxi(Index(1), rows - 1); Index blockCols = numext::maxi(Index(1), cols - 1); dst.setRandom(); ref.setRandom(); ref.bottomRightCorner(blockRows, blockCols) = rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOff()); dst.bottomRightCorner(blockRows, blockCols).device(threadPoolDevice) = rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOff()); VERIFY_IS_CWISE_EQUAL(ref.bottomRightCorner(blockRows, blockCols), dst.bottomRightCorner(blockRows, blockCols)); ref.setZero(); dst.setZero(); ref.bottomRightCorner(blockRows, blockCols) = rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOn()); dst.bottomRightCorner(blockRows, blockCols).device(threadPoolDevice) = rhs_xpr.bottomRightCorner(blockRows, blockCols).unaryExpr(VectorizationOn()); VERIFY_IS_CWISE_EQUAL(ref.bottomRightCorner(blockRows, blockCols), dst.bottomRightCorner(blockRows, blockCols)); } EIGEN_DECLARE_TEST(test) { for (int i = 0; i < g_repeat; i++) { CALL_SUBTEST(test_threaded_assignment(MatrixXd(), 123, 123)); CALL_SUBTEST(test_threaded_assignment(Matrix())); } }