diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 48184cc6a..299b072d3 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -877,13 +877,153 @@ struct meta_floor_log2 // no value, error at compile time }; -template -struct random_default_impl -{ - static inline Scalar run(const Scalar& x, const Scalar& y) - { - if (y <= x) - return x; +template +struct count_bits_impl { + static_assert(std::is_integral::value && std::is_unsigned::value, + "BitsType must be an unsigned integer"); + + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + int n = CHAR_BIT * sizeof(BitsType); + int shift = n / 2; + while (bits > 0 && shift > 0) { + BitsType y = bits >> shift; + if (y > 0) { + n -= shift; + bits = y; + } + shift /= 2; + } + if (shift == 0) { + --n; + } + return n; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + int n = CHAR_BIT * sizeof(BitsType); + int shift = n / 2; + while (bits > 0 && shift > 0) { + BitsType y = bits << shift; + if (y > 0) { + n -= shift; + bits = y; + } + shift /= 2; + } + if (shift == 0) { + --n; + } + return n; + } +}; + +// Count leading zeros. +template +EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + return count_bits_impl::clz(bits); +} + +// Count trailing zeros. +template +EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return count_bits_impl::ctz(bits); +} + +#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG + +template +struct count_bits_impl> { + static const int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static const int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clz(static_cast(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctz(static_cast(bits)); + } +}; + +template +struct count_bits_impl< + BitsType, std::enable_if_t> { + static const int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static const int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clzl(static_cast(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctzl(static_cast(bits)); + } +}; + +template +struct count_bits_impl> { + static const int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static const int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clzll(static_cast(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctzll(static_cast(bits)); + } +}; + +#elif EIGEN_COMP_MSVC + +template +struct count_bits_impl> { + static const int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static const int kLeadingBitsOffset = static_cast((sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT); + unsigned long out; + _BitScanReverse(&out, static_cast(bits)); + return bits == 0 ? kNumBits : static_cast(out - kLeadingBitsOffset); + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + unsigned long out; + _BitScanForward(&out, static_cast(bits)); + return bits == 0 ? kNumBits : static_cast(out); + } +}; + +#ifdef _WIN64 + +template +struct count_bits_impl< + BitsType, std::enable_if_t> { + static const int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static const int kLeadingBitsOffset = static_cast((sizeof(__int64) - sizeof(BitsType)) * CHAR_BIT); + unsigned long out; + _BitScanReverse64(&out, static_cast(bits)); + return bits == 0 ? kNumBits : static_cast(out - kLeadingBitsOffset); + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + unsigned long out; + _BitScanForward64(&out, static_cast(bits)); + return bits == 0 ? kNumBits : static_cast(out); + } +}; + +#endif // _WIN64 + +#endif // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG + +template +struct random_default_impl { + static inline Scalar run(const Scalar& x, const Scalar& y) { + if (y <= x) return x; // ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself. typedef typename make_unsigned::type ScalarU; // ScalarX is the widest of ScalarU and unsigned int. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4aff37797..dbd4bc618 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -48,7 +48,7 @@ if(CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK) set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ") - + ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}") else() ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ") @@ -61,7 +61,7 @@ if(UMFPACK_FOUND AND EIGEN_BUILD_BLAS) set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ") - + ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}") else() ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ") @@ -74,7 +74,7 @@ if(KLU_FOUND AND EIGEN_BUILD_BLAS) set(SPARSE_LIBS ${SPARSE_LIBS} ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) set(KLU_ALL_LIBS ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "KLU, ") - + ei_add_test(klu_support "" "${KLU_ALL_LIBS}") else() ei_add_property(EIGEN_MISSING_BACKENDS "KLU, ") @@ -87,7 +87,7 @@ if(SuperLU_FOUND AND EIGEN_BUILD_BLAS) set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "SuperLU, ") - + ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}") else() ei_add_property(EIGEN_MISSING_BACKENDS "SuperLU, ") @@ -160,6 +160,7 @@ endif() set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official") add_custom_target(BuildOfficial) +ei_add_test(clz) ei_add_test(rand) ei_add_test(meta) ei_add_test(numext) @@ -383,7 +384,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") if(EIGEN_TEST_CUDA_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") @@ -401,11 +402,11 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}") cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") endif() - + set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") - + ei_add_test(gpu_basic) - + unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) endif() @@ -418,8 +419,8 @@ if (EIGEN_TEST_HIP) set(HIP_PATH "/opt/rocm/hip" CACHE STRING "Path to the HIP installation.") if (EXISTS ${HIP_PATH}) - - list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake) + + list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake) find_package(HIP REQUIRED) if (HIP_FOUND) @@ -433,12 +434,12 @@ if (EIGEN_TEST_HIP) set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") ei_add_test(gpu_basic) unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) - + elseif ((${HIP_PLATFORM} STREQUAL "nvcc") OR (${HIP_PLATFORM} STREQUAL "nvidia")) message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen") else () message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}") - endif() + endif() endif() else () message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but the specified HIP_PATH (${HIP_PATH}) does not exist") diff --git a/test/clz.cpp b/test/clz.cpp new file mode 100644 index 000000000..1d08b4715 --- /dev/null +++ b/test/clz.cpp @@ -0,0 +1,74 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2023 The Eigen Authors +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template +int ref_clz(T val) { + static const int kNumBits = sizeof(T) * CHAR_BIT; + T kMsbMask = T(1) << (kNumBits - 1); + int z = 0; + for (; z < kNumBits && ((val & kMsbMask) == 0); ++z) { + val <<= 1; + } + return z; +} + +template +int ref_ctz(T val) { + static const int kNumBits = sizeof(T) * CHAR_BIT; + T kLsbMask = T(1); + int z = 0; + for (; z < kNumBits && ((val & kLsbMask) == 0); ++z) { + val >>= 1; + } + return z; +} + +template +void test_clz_ctz() { + T step = sizeof(T) <= 2 ? 1 : (Eigen::NumTraits::highest() / (T(1) << 16)); + T iters = Eigen::NumTraits::highest() / step; + for (T i = 0; i < iters; ++i) { + T val = i * step; + int expected_clz = ref_clz(val); + int actual_clz = Eigen::internal::clz(val); + VERIFY(expected_clz == actual_clz); + + int expected_ctz = ref_ctz(val); + int actual_ctz = Eigen::internal::ctz(val); + VERIFY(expected_ctz == actual_ctz); + } +} + +template +void test_clz_ctz_random() { + for (int i = 0; i < 1024 * 1024; ++i) { + T val = Eigen::internal::random(); + int expected_clz = ref_clz(val); + int actual_clz = Eigen::internal::clz(val); + VERIFY(expected_clz == actual_clz); + + int expected_ctz = ref_ctz(val); + int actual_ctz = Eigen::internal::ctz(val); + VERIFY(expected_ctz == actual_ctz); + } +} + +EIGEN_DECLARE_TEST(clz) { + CALL_SUBTEST_1(test_clz_ctz()); + CALL_SUBTEST_2(test_clz_ctz()); + CALL_SUBTEST_3(test_clz_ctz()); + CALL_SUBTEST_4(test_clz_ctz()); + + for (int i = 0; i < g_repeat; i++) { + test_clz_ctz_random(); + test_clz_ctz_random(); + } +}