Add internal ctz/clz implementation.

This commit is contained in:
Antonio Sánchez 2023-12-11 21:03:09 +00:00
parent 454f89af9d
commit 75e273afcc
3 changed files with 227 additions and 9 deletions

View File

@ -628,6 +628,149 @@ struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus> {
// no value, error at compile time
};
template <typename BitsType, typename EnableIf = void>
struct count_bits_impl {
static_assert(std::is_integral<BitsType>::value && std::is_unsigned<BitsType>::value,
"BitsType must be an unsigned integer");
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
int n = CHAR_BIT * sizeof(BitsType);
int shift = n / 2;
while (bits > 0 && shift > 0) {
BitsType y = bits >> shift;
if (y > 0) {
n -= shift;
bits = y;
}
shift /= 2;
}
if (shift == 0) {
--n;
}
return n;
}
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
int n = CHAR_BIT * sizeof(BitsType);
int shift = n / 2;
while (bits > 0 && shift > 0) {
BitsType y = bits << shift;
if (y > 0) {
n -= shift;
bits = y;
}
shift /= 2;
}
if (shift == 0) {
--n;
}
return n;
}
};
// Count leading zeros.
template <typename BitsType>
EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
return count_bits_impl<BitsType>::clz(bits);
}
// Count trailing zeros.
template <typename BitsType>
EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
return count_bits_impl<BitsType>::ctz(bits);
}
#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
template <typename BitsType>
struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned int)>> {
static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT;
return bits == 0 ? kNumBits : __builtin_clz(static_cast<unsigned int>(bits)) - kLeadingBitsOffset;
}
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
return bits == 0 ? kNumBits : __builtin_ctz(static_cast<unsigned int>(bits));
}
};
template <typename BitsType>
struct count_bits_impl<
BitsType, std::enable_if_t<sizeof(unsigned int) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(unsigned long)>> {
static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT;
return bits == 0 ? kNumBits : __builtin_clzl(static_cast<unsigned long>(bits)) - kLeadingBitsOffset;
}
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
return bits == 0 ? kNumBits : __builtin_ctzl(static_cast<unsigned long>(bits));
}
};
template <typename BitsType>
struct count_bits_impl<BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) &&
sizeof(BitsType) <= sizeof(unsigned long long)>> {
static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT;
return bits == 0 ? kNumBits : __builtin_clzll(static_cast<unsigned long long>(bits)) - kLeadingBitsOffset;
}
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
return bits == 0 ? kNumBits : __builtin_ctzll(static_cast<unsigned long long>(bits));
}
};
#elif EIGEN_COMP_MSVC
template <typename BitsType>
struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned long)>> {
static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT);
unsigned long out;
_BitScanReverse(&out, static_cast<unsigned long>(bits));
return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset);
}
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
unsigned long out;
_BitScanForward(&out, static_cast<unsigned long>(bits));
return bits == 0 ? kNumBits : static_cast<int>(out);
}
};
#ifdef _WIN64
template <typename BitsType>
struct count_bits_impl<
BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(__int64)>> {
static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(__int64) - sizeof(BitsType)) * CHAR_BIT);
unsigned long out;
_BitScanReverse64(&out, static_cast<unsigned __int64>(bits));
return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset);
}
static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
unsigned long out;
_BitScanForward64(&out, static_cast<unsigned __int64>(bits));
return bits == 0 ? kNumBits : static_cast<int>(out);
}
};
#endif // _WIN64
#endif // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
template <typename Scalar>
struct random_default_impl<Scalar, false, true> {
static inline Scalar run(const Scalar& x, const Scalar& y) {

View File

@ -48,7 +48,7 @@ if(CHOLMOD_FOUND AND EIGEN_BUILD_BLAS AND EIGEN_BUILD_LAPACK)
set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ")
ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ")
@ -61,7 +61,7 @@ if(UMFPACK_FOUND AND EIGEN_BUILD_BLAS)
set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ")
ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ")
@ -74,7 +74,7 @@ if(KLU_FOUND AND EIGEN_BUILD_BLAS)
set(SPARSE_LIBS ${SPARSE_LIBS} ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(KLU_ALL_LIBS ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "KLU, ")
ei_add_test(klu_support "" "${KLU_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "KLU, ")
@ -87,7 +87,7 @@ if(SuperLU_FOUND AND EIGEN_BUILD_BLAS)
set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "SuperLU, ")
ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "SuperLU, ")
@ -171,6 +171,7 @@ endif()
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
add_custom_target(BuildOfficial)
ei_add_test(clz)
ei_add_test(rand)
ei_add_test(meta)
ei_add_test(maxsizevector)
@ -406,7 +407,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
if(EIGEN_TEST_CUDA_CLANG)
string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
@ -433,12 +434,12 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
endif()
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(gpu_example)
ei_add_test(gpu_basic)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
@ -477,7 +478,7 @@ if (EIGEN_TEST_HIP)
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
else ()
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
endif()
endif()
endif()
endif()

74
test/clz.cpp Normal file
View File

@ -0,0 +1,74 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2023 The Eigen Authors
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
template <typename T>
int ref_clz(T val) {
constexpr int kNumBits = sizeof(T) * CHAR_BIT;
T kMsbMask = T(1) << (kNumBits - 1);
int z = 0;
for (; z < kNumBits && ((val & kMsbMask) == 0); ++z) {
val <<= 1;
}
return z;
}
template <typename T>
int ref_ctz(T val) {
constexpr int kNumBits = sizeof(T) * CHAR_BIT;
T kLsbMask = T(1);
int z = 0;
for (; z < kNumBits && ((val & kLsbMask) == 0); ++z) {
val >>= 1;
}
return z;
}
template <typename T>
void test_clz_ctz() {
T step = sizeof(T) <= 2 ? 1 : (Eigen::NumTraits<T>::highest() / (T(1) << 16));
T iters = Eigen::NumTraits<T>::highest() / step;
for (T i = 0; i < iters; ++i) {
T val = i * step;
int expected_clz = ref_clz(val);
int actual_clz = Eigen::internal::clz(val);
VERIFY(expected_clz == actual_clz);
int expected_ctz = ref_ctz(val);
int actual_ctz = Eigen::internal::ctz(val);
VERIFY(expected_ctz == actual_ctz);
}
}
template <typename T>
void test_clz_ctz_random() {
for (int i = 0; i < 1024 * 1024; ++i) {
T val = Eigen::internal::random<T>();
int expected_clz = ref_clz(val);
int actual_clz = Eigen::internal::clz(val);
VERIFY(expected_clz == actual_clz);
int expected_ctz = ref_ctz(val);
int actual_ctz = Eigen::internal::ctz(val);
VERIFY(expected_ctz == actual_ctz);
}
}
EIGEN_DECLARE_TEST(clz) {
CALL_SUBTEST_1(test_clz_ctz<uint8_t>());
CALL_SUBTEST_2(test_clz_ctz<uint16_t>());
CALL_SUBTEST_3(test_clz_ctz<uint32_t>());
CALL_SUBTEST_4(test_clz_ctz<uint64_t>());
for (int i = 0; i < g_repeat; i++) {
test_clz_ctz_random<uint32_t>();
test_clz_ctz_random<uint64_t>();
}
}