Device-compatible Tuple implementation.

An analogue of `std::tuple` that works on device.

Context: I've tried `std::tuple` in various versions of NVCC and clang,
and although code seems to compile, it often fails to run - generating
"illegal memory access" errors, or "illegal instruction" errors.
This replacement does work on device.
This commit is contained in:
Antonio Sanchez 2021-08-26 13:05:23 -07:00
parent fcd73b4884
commit 26e5beb8cb
4 changed files with 480 additions and 0 deletions

View File

@ -0,0 +1,302 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2021 The Eigen Team
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TUPLE_GPU
#define EIGEN_TUPLE_GPU
#include <type_traits>
#include <utility>
// This is a replacement of std::tuple that can be used in device code.
namespace Eigen {
namespace internal {
namespace tuple_impl {
// Internal tuple implementation.
template<size_t N, typename... Types>
class TupleImpl;
// Generic recursive tuple.
template<size_t N, typename T1, typename... Ts>
class TupleImpl<N, T1, Ts...> {
public:
// Tuple may contain Eigen types.
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
// Default constructor, enable if all types are default-constructible.
template<typename U1 = T1, typename EnableIf = typename std::enable_if<
std::is_default_constructible<U1>::value
&& reduce_all<std::is_default_constructible<Ts>::value...>::value
>::type>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
TupleImpl() : head_{}, tail_{} {}
// Element constructor.
template<typename U1, typename... Us,
// Only enable if...
typename EnableIf = typename std::enable_if<
// the number of input arguments match, and ...
sizeof...(Us) == sizeof...(Ts) && (
// this does not look like a copy/move constructor.
N > 1 || std::is_convertible<U1, T1>::value)
>::type>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
TupleImpl(U1&& arg1, Us&&... args)
: head_(std::forward<U1>(arg1)), tail_(std::forward<Us>(args)...) {}
// The first stored value.
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T1& head() {
return head_;
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
const T1& head() const {
return head_;
}
// The tail values.
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TupleImpl<N-1, Ts...>& tail() {
return tail_;
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
const TupleImpl<N-1, Ts...>& tail() const {
return tail_;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void swap(TupleImpl& other) {
using numext::swap;
swap(head_, other.head_);
swap(tail_, other.tail_);
}
template<typename... UTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TupleImpl& operator=(const TupleImpl<N, UTypes...>& other) {
head_ = other.head_;
tail_ = other.tail_;
return *this;
}
template<typename... UTypes>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TupleImpl& operator=(TupleImpl<N, UTypes...>&& other) {
head_ = std::move(other.head_);
tail_ = std::move(other.tail_);
return *this;
}
private:
// Allow related tuples to reference head_/tail_.
template<size_t M, typename... UTypes>
friend class TupleImpl;
T1 head_;
TupleImpl<N-1, Ts...> tail_;
};
// Empty tuple specialization.
template<>
class TupleImpl<size_t(0)> {};
template<typename TupleType>
struct is_tuple : std::false_type {};
template<typename... Types>
struct is_tuple< TupleImpl<sizeof...(Types), Types...> > : std::true_type {};
// Gets an element from a tuple.
template<size_t Idx, typename T1, typename... Ts>
struct tuple_get_impl {
using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
using ReturnType = typename tuple_get_impl<Idx - 1, Ts...>::ReturnType;
static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
ReturnType& run(TupleType& tuple) {
return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
}
static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
const ReturnType& run(const TupleType& tuple) {
return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
}
};
// Base case, getting the head element.
template<typename T1, typename... Ts>
struct tuple_get_impl<0, T1, Ts...> {
using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
using ReturnType = T1;
static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T1& run(TupleType& tuple) {
return tuple.head();
}
static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
const T1& run(const TupleType& tuple) {
return tuple.head();
}
};
// Concatenates N Tuples.
template<size_t NTuples, typename... Tuples>
struct tuple_cat_impl;
template<size_t NTuples, size_t N1, typename... Args1, size_t N2, typename... Args2, typename... Tuples>
struct tuple_cat_impl<NTuples, TupleImpl<N1, Args1...>, TupleImpl<N2, Args2...>, Tuples...> {
using TupleType1 = TupleImpl<N1, Args1...>;
using TupleType2 = TupleImpl<N2, Args2...>;
using MergedTupleType = TupleImpl<N1 + N2, Args1..., Args2...>;
using ReturnType = typename tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::ReturnType;
// Uses the index sequences to extract and merge elements from tuple1 and tuple2,
// then recursively calls again.
template<typename Tuple1, size_t... I1s, typename Tuple2, size_t... I2s, typename... MoreTuples>
static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
ReturnType run(Tuple1&& tuple1, index_sequence<I1s...>,
Tuple2&& tuple2, index_sequence<I2s...>,
MoreTuples&&... tuples) {
return tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::run(
MergedTupleType(tuple_get_impl<I1s, Args1...>::run(std::forward<Tuple1>(tuple1))...,
tuple_get_impl<I2s, Args2...>::run(std::forward<Tuple2>(tuple2))...),
std::forward<MoreTuples>(tuples)...);
}
// Concatenates the first two tuples.
template<typename Tuple1, typename Tuple2, typename... MoreTuples>
static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
ReturnType run(Tuple1&& tuple1, Tuple2&& tuple2, MoreTuples&&... tuples) {
return run(std::forward<Tuple1>(tuple1), make_index_sequence<N1>{},
std::forward<Tuple2>(tuple2), make_index_sequence<N2>{},
std::forward<MoreTuples>(tuples)...);
}
};
// Base case with a single tuple.
template<size_t N, typename... Args>
struct tuple_cat_impl<1, TupleImpl<N, Args...> > {
using ReturnType = TupleImpl<N, Args...>;
template<typename Tuple1>
static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
ReturnType run(Tuple1&& tuple1) {
return tuple1;
}
};
// Special case of no tuples.
template<>
struct tuple_cat_impl<0> {
using ReturnType = TupleImpl<0>;
static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
ReturnType run() {return ReturnType{}; }
};
// For use in make_tuple, unwraps a reference_wrapper.
template <typename T>
struct unwrap_reference_wrapper { using type = T; };
template <typename T>
struct unwrap_reference_wrapper<std::reference_wrapper<T> > { using type = T&; };
// For use in make_tuple, decays a type and unwraps a reference_wrapper.
template <typename T>
struct unwrap_decay {
using type = typename unwrap_reference_wrapper<typename std::decay<T>::type>::type;
};
/**
* Alternative to std::tuple that can be used on device.
*/
template<typename... Types>
using tuple = TupleImpl<sizeof...(Types), Types...>;
/**
* Utility for determining a tuple's size.
*/
template<typename Tuple>
struct tuple_size;
template<typename... Types >
struct tuple_size< tuple<Types...> > : std::integral_constant<size_t, sizeof...(Types)> {};
/**
* Gets an element of a tuple.
* \tparam Idx index of the element.
* \tparam Types ... tuple element types.
* \param tuple the tuple.
* \return a reference to the desired element.
*/
template<size_t Idx, typename... Types>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const typename tuple_get_impl<Idx, Types...>::ReturnType&
get(const tuple<Types...>& tuple) {
return tuple_get_impl<Idx, Types...>::run(tuple);
}
template<size_t Idx, typename... Types>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename tuple_get_impl<Idx, Types...>::ReturnType&
get(tuple<Types...>& tuple) {
return tuple_get_impl<Idx, Types...>::run(tuple);
}
/**
* Concatenate multiple tuples.
* \param tuples ... list of tuples.
* \return concatenated tuple.
*/
template<typename... Tuples,
typename EnableIf = typename std::enable_if<
internal::reduce_all<
is_tuple<typename std::decay<Tuples>::type>::value...>::value>::type>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::ReturnType
tuple_cat(Tuples&&... tuples) {
return tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::run(std::forward<Tuples>(tuples)...);
}
/**
* Tie arguments together into a tuple.
*/
template <typename... Args, typename ReturnType = tuple<Args&...> >
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
ReturnType tie(Args&... args) EIGEN_NOEXCEPT {
return ReturnType{args...};
}
/**
* Create a tuple of l-values with the supplied arguments.
*/
template <typename... Args, typename ReturnType = tuple<typename unwrap_decay<Args>::type...> >
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
ReturnType make_tuple(Args&&... args) {
return ReturnType{std::forward<Args>(args)...};
}
/**
* Forward a set of arguments as a tuple.
*/
template <typename... Args>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
tuple<Args...> forward_as_tuple(Args&&... args) {
return tuple<Args...>(std::forward<Args>(args)...);
}
} // namespace tuple_impl
} // namespace internal
} // namespace Eigen
#endif // EIGEN_TUPLE_GPU

View File

@ -648,6 +648,60 @@ struct invoke_result<F, ArgType0, ArgType1, void> {
};
#endif
// C++14 integer/index_sequence.
#if defined(__cpp_lib_integer_sequence) && __cpp_lib_integer_sequence >= 201304L && EIGEN_MAX_CPP_VER >= 14
using std::integer_sequence;
using std::make_integer_sequence;
using std::index_sequence;
using std::make_index_sequence;
#else
template <typename T, T... Ints>
struct integer_sequence {
static EIGEN_CONSTEXPR size_t size() EIGEN_NOEXCEPT { return sizeof...(Ints); }
};
template <typename T, typename Sequence, T N>
struct append_integer;
template<typename T, T... Ints, T N>
struct append_integer<T, integer_sequence<T, Ints...>, N> {
using type = integer_sequence<T, Ints..., N>;
};
template<typename T, size_t N>
struct generate_integer_sequence {
using type = typename append_integer<T, typename generate_integer_sequence<T, N-1>::type, N-1>::type;
};
template<typename T>
struct generate_integer_sequence<T, 0> {
using type = integer_sequence<T>;
};
template <typename T, size_t N>
using make_integer_sequence = typename generate_integer_sequence<T, N>::type;
template<size_t... Ints>
using index_sequence = integer_sequence<size_t, Ints...>;
template<size_t N>
using make_index_sequence = make_integer_sequence<size_t, N>;
#endif
// Reduces a sequence of bools to true if all are true, false otherwise.
template<bool... values>
using reduce_all = std::is_same<integer_sequence<bool, values..., true>, integer_sequence<bool, true, values...> >;
// Reduces a sequence of bools to true if any are true, false if all false.
template<bool... values>
using reduce_any = std::integral_constant<bool,
!std::is_same<integer_sequence<bool, values..., false>, integer_sequence<bool, false, values...> >::value>;
struct meta_yes { char a[1]; };
struct meta_no { char a[2]; };

View File

@ -289,6 +289,7 @@ ei_add_test(random_matrix)
ei_add_test(initializer_list_construction)
ei_add_test(diagonal_matrix_variadic_ctor)
ei_add_test(serializer)
ei_add_test(tuple_test)
add_executable(bug1213 bug1213.cpp bug1213_main.cpp)

123
test/tuple_test.cpp Normal file
View File

@ -0,0 +1,123 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2021 The Eigen Team
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/Core>
#include <Eigen/src/Core/arch/GPU/Tuple.h>
using namespace Eigen::internal;
using Eigen::internal::tuple_impl::tuple;
void basic_tuple_test() {
// Construction.
tuple<> tuple0 {};
tuple<int> tuple1 {1};
tuple<int, float> tuple2 {3, 5.0f};
tuple<int, float, double> tuple3 {7, 11.0f, 13.0};
// Default construction.
tuple<> tuple0default;
EIGEN_UNUSED_VARIABLE(tuple0default)
tuple<int> tuple1default;
EIGEN_UNUSED_VARIABLE(tuple1default)
tuple<int, float> tuple2default;
EIGEN_UNUSED_VARIABLE(tuple2default)
tuple<int, float, double> tuple3default;
EIGEN_UNUSED_VARIABLE(tuple3default)
// Assignment.
tuple<> tuple0b = tuple0;
EIGEN_UNUSED_VARIABLE(tuple0b)
decltype(tuple1) tuple1b = tuple1;
EIGEN_UNUSED_VARIABLE(tuple1b)
decltype(tuple2) tuple2b = tuple2;
EIGEN_UNUSED_VARIABLE(tuple2b)
decltype(tuple3) tuple3b = tuple3;
EIGEN_UNUSED_VARIABLE(tuple3b)
// get.
VERIFY_IS_EQUAL(tuple_impl::get<0>(tuple3), 7);
VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple3), 11.0f);
VERIFY_IS_EQUAL(tuple_impl::get<2>(tuple3), 13.0);
// tuple_impl::tuple_size.
VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple0)>::value, 0);
VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple1)>::value, 1);
VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple2)>::value, 2);
VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple3)>::value, 3);
// tuple_impl::tuple_cat.
auto tuple2cat3 = tuple_impl::tuple_cat(tuple2, tuple3);
VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple2cat3)>::value, 5);
VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple2cat3), 5.0f);
VERIFY_IS_EQUAL(tuple_impl::get<3>(tuple2cat3), 11.0f);
auto tuple3cat0 = tuple_impl::tuple_cat(tuple3, tuple0);
VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple3cat0)>::value, 3);
auto singlecat = tuple_impl::tuple_cat(tuple3);
VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(singlecat)>::value, 3);
auto emptycat = tuple_impl::tuple_cat();
VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(emptycat)>::value, 0);
auto tuple0cat1cat2cat3 = tuple_impl::tuple_cat(tuple0, tuple1, tuple2, tuple3);
VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple0cat1cat2cat3)>::value, 6);
// make_tuple.
// The tuple types should uses values for the second and fourth parameters.
double tmp = 20;
auto tuple_make = tuple_impl::make_tuple(int(10), tmp, float(20.0f), tuple0);
VERIFY( (std::is_same<decltype(tuple_make), tuple<int, double, float, tuple<> > >::value) );
VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple_make), tmp);
// forward_as_tuple.
// The tuple types should uses references for the second and fourth parameters.
auto tuple_forward = tuple_impl::forward_as_tuple(int(10), tmp, float(20.0f), tuple0);
VERIFY( (std::is_same<decltype(tuple_forward), tuple<int, double&, float, tuple<>& > >::value) );
VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple_forward), tmp);
// tie.
auto tuple_tie = tuple_impl::tie(tuple0, tuple1, tuple2, tuple3);
VERIFY( (std::is_same<decltype(tuple_tie),
tuple<decltype(tuple0)&,
decltype(tuple1)&,
decltype(tuple2)&,
decltype(tuple3)&> >::value) );
VERIFY_IS_EQUAL( (tuple_impl::get<1>(tuple_impl::get<2>(tuple_tie))), 5.0 );
// Modify value and ensure tuple2 is updated.
tuple_impl::get<1>(tuple_impl::get<2>(tuple_tie)) = 10.0;
VERIFY_IS_EQUAL( (tuple_impl::get<1>(tuple2)), 10.0 );
// Assignment.
int x = -1;
float y = -1;
double z = -1;
tuple_impl::tie(x, y, z) = tuple3;
VERIFY_IS_EQUAL(x, tuple_impl::get<0>(tuple3));
VERIFY_IS_EQUAL(y, tuple_impl::get<1>(tuple3));
VERIFY_IS_EQUAL(z, tuple_impl::get<2>(tuple3));
tuple<int, float, double> tuple3c(-2, -2, -2);
tuple3c = std::move(tuple3b);
VERIFY_IS_EQUAL(tuple_impl::get<0>(tuple3c), tuple_impl::get<0>(tuple3));
VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple3c), tuple_impl::get<1>(tuple3));
VERIFY_IS_EQUAL(tuple_impl::get<2>(tuple3c), tuple_impl::get<2>(tuple3));
}
void eigen_tuple_test() {
tuple<Eigen::Matrix3d, Eigen::MatrixXd> tuple;
tuple_impl::get<0>(tuple).setRandom();
tuple_impl::get<1>(tuple).setRandom(10, 10);
auto tuple_tie = tuple_impl::tie(tuple_impl::get<0>(tuple), tuple_impl::get<1>(tuple));
tuple_impl::get<1>(tuple_tie).setIdentity();
VERIFY(tuple_impl::get<1>(tuple).isIdentity());
}
EIGEN_DECLARE_TEST(tuple)
{
CALL_SUBTEST(basic_tuple_test());
CALL_SUBTEST(eigen_tuple_test());
}