Device-compatible Tuple implementation.

An analogue of `std::tuple` that works on device. Context: I've tried `std::tuple` in various versions of NVCC and clang, and although code seems to compile, it often fails to run - generating "illegal memory access" errors, or "illegal instruction" errors. This replacement does work on device.
2025-07-16 01:51:51 +08:00 · 2021-08-26 13:05:23 -07:00 · 2021-08-26 13:05:23 -07:00 · 26e5beb8cb
commit 26e5beb8cb
parent fcd73b4884
4 changed files with 480 additions and 0 deletions
--- a/Eigen/src/Core/arch/GPU/Tuple.h
+++ b/Eigen/src/Core/arch/GPU/Tuple.h
@ -0,0 +1,302 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 The Eigen Team
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_TUPLE_GPU
+#define EIGEN_TUPLE_GPU
+
+#include <type_traits>
+#include <utility>
+
+// This is a replacement of std::tuple that can be used in device code.
+
+namespace Eigen {
+namespace internal {
+namespace tuple_impl {
+
+// Internal tuple implementation.
+template<size_t N, typename... Types>
+class TupleImpl;
+
+// Generic recursive tuple.
+template<size_t N, typename T1, typename... Ts>
+class TupleImpl<N, T1, Ts...> {
+ public:
+  // Tuple may contain Eigen types.
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
+  
+  // Default constructor, enable if all types are default-constructible.
+  template<typename U1 = T1, typename EnableIf = typename std::enable_if<
+      std::is_default_constructible<U1>::value
+      && reduce_all<std::is_default_constructible<Ts>::value...>::value
+    >::type>
+  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
+  TupleImpl() : head_{}, tail_{} {}
+ 
+  // Element constructor.
+  template<typename U1, typename... Us, 
+           // Only enable if...
+           typename EnableIf = typename std::enable_if<
+              // the number of input arguments match, and ...
+              sizeof...(Us) == sizeof...(Ts) && (
+                // this does not look like a copy/move constructor.
+                N > 1 || std::is_convertible<U1, T1>::value)
+           >::type>
+  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
+  TupleImpl(U1&& arg1, Us&&... args) 
+    : head_(std::forward<U1>(arg1)), tail_(std::forward<Us>(args)...) {}
+ 
+  // The first stored value. 
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  T1& head() {
+    return head_;
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  const T1& head() const {
+    return head_;
+  }
+  
+  // The tail values.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  TupleImpl<N-1, Ts...>& tail() {
+    return tail_;
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  const TupleImpl<N-1, Ts...>& tail() const {
+    return tail_;
+  }
+  
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  void swap(TupleImpl& other) {
+    using numext::swap;
+    swap(head_, other.head_);
+    swap(tail_, other.tail_);
+  }
+  
+  template<typename... UTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  TupleImpl& operator=(const TupleImpl<N, UTypes...>& other) {
+    head_ = other.head_;
+    tail_ = other.tail_;
+    return *this;
+  }
+  
+  template<typename... UTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  TupleImpl& operator=(TupleImpl<N, UTypes...>&& other) {
+    head_ = std::move(other.head_);
+    tail_ = std::move(other.tail_);
+    return *this;
+  }
+  
+ private:
+  // Allow related tuples to reference head_/tail_.
+  template<size_t M, typename... UTypes>
+  friend class TupleImpl;
+ 
+  T1 head_;
+  TupleImpl<N-1, Ts...> tail_;
+};
+
+// Empty tuple specialization.
+template<>
+class TupleImpl<size_t(0)> {};
+
+template<typename TupleType>
+struct is_tuple : std::false_type {};
+
+template<typename... Types>
+struct is_tuple< TupleImpl<sizeof...(Types), Types...> > : std::true_type {};
+
+// Gets an element from a tuple.
+template<size_t Idx, typename T1, typename... Ts>
+struct tuple_get_impl {
+  using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
+  using ReturnType = typename tuple_get_impl<Idx - 1, Ts...>::ReturnType;
+  
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  ReturnType& run(TupleType& tuple) {
+    return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
+  }
+
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  const ReturnType& run(const TupleType& tuple) {
+    return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
+  }
+};
+
+// Base case, getting the head element.
+template<typename T1, typename... Ts>
+struct tuple_get_impl<0, T1, Ts...> {
+  using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
+  using ReturnType = T1;
+
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  T1& run(TupleType& tuple) {
+    return tuple.head();
+  }
+
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+  const T1& run(const TupleType& tuple) {
+    return tuple.head();
+  }
+};
+
+// Concatenates N Tuples.
+template<size_t NTuples, typename... Tuples>
+struct tuple_cat_impl;
+
+template<size_t NTuples, size_t N1, typename... Args1, size_t N2, typename... Args2, typename... Tuples>
+struct tuple_cat_impl<NTuples, TupleImpl<N1, Args1...>, TupleImpl<N2, Args2...>, Tuples...> {
+  using TupleType1 = TupleImpl<N1, Args1...>;
+  using TupleType2 = TupleImpl<N2, Args2...>;
+  using MergedTupleType = TupleImpl<N1 + N2, Args1..., Args2...>;
+  
+  using ReturnType = typename tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::ReturnType;
+  
+  // Uses the index sequences to extract and merge elements from tuple1 and tuple2,
+  // then recursively calls again.
+  template<typename Tuple1, size_t... I1s, typename Tuple2, size_t... I2s, typename... MoreTuples>
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  ReturnType run(Tuple1&& tuple1, index_sequence<I1s...>,
+                 Tuple2&& tuple2, index_sequence<I2s...>,
+                 MoreTuples&&... tuples) {
+    return tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::run(
+        MergedTupleType(tuple_get_impl<I1s, Args1...>::run(std::forward<Tuple1>(tuple1))...,
+                        tuple_get_impl<I2s, Args2...>::run(std::forward<Tuple2>(tuple2))...),
+        std::forward<MoreTuples>(tuples)...);
+  }
+  
+  // Concatenates the first two tuples.
+  template<typename Tuple1, typename Tuple2, typename... MoreTuples>
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  ReturnType run(Tuple1&& tuple1, Tuple2&& tuple2, MoreTuples&&... tuples) {
+    return run(std::forward<Tuple1>(tuple1), make_index_sequence<N1>{},
+               std::forward<Tuple2>(tuple2), make_index_sequence<N2>{},
+               std::forward<MoreTuples>(tuples)...);
+  }
+};
+
+// Base case with a single tuple.
+template<size_t N, typename... Args>
+struct tuple_cat_impl<1, TupleImpl<N, Args...> > { 
+  using ReturnType = TupleImpl<N, Args...>;
+  
+  template<typename Tuple1>
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  ReturnType run(Tuple1&& tuple1) {
+    return tuple1;
+  }
+};
+
+// Special case of no tuples.
+template<>
+struct tuple_cat_impl<0> { 
+  using ReturnType = TupleImpl<0>;
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  ReturnType run() {return ReturnType{}; }
+};
+
+// For use in make_tuple, unwraps a reference_wrapper.
+template <typename T>
+struct unwrap_reference_wrapper { using type = T; };
+ 
+template <typename T>
+struct unwrap_reference_wrapper<std::reference_wrapper<T> > { using type = T&; };
+
+// For use in make_tuple, decays a type and unwraps a reference_wrapper.
+template <typename T>
+struct unwrap_decay {
+  using type = typename unwrap_reference_wrapper<typename std::decay<T>::type>::type;
+};
+
+/**
+ * Alternative to std::tuple that can be used on device.
+ */
+template<typename... Types>
+using tuple = TupleImpl<sizeof...(Types), Types...>;
+
+/**
+ * Utility for determining a tuple's size.
+ */
+template<typename Tuple>
+struct tuple_size;
+
+template<typename... Types >
+struct tuple_size< tuple<Types...> > : std::integral_constant<size_t, sizeof...(Types)> {};
+
+/**
+ * Gets an element of a tuple.
+ * \tparam Idx index of the element.
+ * \tparam Types ... tuple element types.
+ * \param tuple the tuple.
+ * \return a reference to the desired element.
+ */
+template<size_t Idx, typename... Types>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+const typename tuple_get_impl<Idx, Types...>::ReturnType&
+get(const tuple<Types...>& tuple) {
+  return tuple_get_impl<Idx, Types...>::run(tuple);
+}
+
+template<size_t Idx, typename... Types>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+typename tuple_get_impl<Idx, Types...>::ReturnType&
+get(tuple<Types...>& tuple) {
+  return tuple_get_impl<Idx, Types...>::run(tuple);
+}
+
+/**
+ * Concatenate multiple tuples.
+ * \param tuples ... list of tuples.
+ * \return concatenated tuple.
+ */
+template<typename... Tuples,
+          typename EnableIf = typename std::enable_if<
+            internal::reduce_all<
+              is_tuple<typename std::decay<Tuples>::type>::value...>::value>::type>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+typename tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::ReturnType
+tuple_cat(Tuples&&... tuples) {
+  return tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::run(std::forward<Tuples>(tuples)...);
+}
+
+/**
+ * Tie arguments together into a tuple.
+ */
+template <typename... Args, typename ReturnType = tuple<Args&...> >
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ReturnType tie(Args&... args) EIGEN_NOEXCEPT {
+    return ReturnType{args...};
+}
+
+/**
+ * Create a tuple of l-values with the supplied arguments.
+ */
+template <typename... Args, typename ReturnType = tuple<typename unwrap_decay<Args>::type...> >
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ReturnType make_tuple(Args&&... args) {
+  return ReturnType{std::forward<Args>(args)...};
+}
+
+/**
+ * Forward a set of arguments as a tuple.
+ */
+template <typename... Args>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+tuple<Args...> forward_as_tuple(Args&&... args) {
+  return tuple<Args...>(std::forward<Args>(args)...);
+}
+
+}  // namespace tuple_impl
+}  // namespace internal
+}  // namespace Eigen
+
+#endif  // EIGEN_TUPLE_GPU
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@ -648,6 +648,60 @@ struct invoke_result<F, ArgType0, ArgType1, void> {
 };
 #endif

+// C++14 integer/index_sequence.
+#if defined(__cpp_lib_integer_sequence) && __cpp_lib_integer_sequence >= 201304L && EIGEN_MAX_CPP_VER >= 14
+
+using std::integer_sequence;
+using std::make_integer_sequence;
+
+using std::index_sequence;
+using std::make_index_sequence;
+
+#else 
+
+template <typename T, T... Ints>
+struct integer_sequence {
+  static EIGEN_CONSTEXPR size_t size() EIGEN_NOEXCEPT { return sizeof...(Ints); }
+};
+
+template <typename T, typename Sequence, T N>
+struct append_integer;
+
+template<typename T, T... Ints, T N>
+struct append_integer<T, integer_sequence<T, Ints...>, N> {
+  using type = integer_sequence<T, Ints..., N>;
+};
+
+template<typename T, size_t N>
+struct generate_integer_sequence {
+  using type = typename append_integer<T, typename generate_integer_sequence<T, N-1>::type, N-1>::type;
+};
+
+template<typename T>
+struct generate_integer_sequence<T, 0> {
+  using type = integer_sequence<T>;
+};
+
+template <typename T, size_t N>
+using make_integer_sequence = typename generate_integer_sequence<T, N>::type;
+
+template<size_t... Ints>
+using index_sequence = integer_sequence<size_t, Ints...>;
+
+template<size_t N>
+using make_index_sequence = make_integer_sequence<size_t, N>;
+
+#endif
+
+// Reduces a sequence of bools to true if all are true, false otherwise.
+template<bool... values>
+using reduce_all = std::is_same<integer_sequence<bool, values..., true>, integer_sequence<bool, true, values...> >;
+
+// Reduces a sequence of bools to true if any are true, false if all false.
+template<bool... values>
+using reduce_any = std::integral_constant<bool,
+    !std::is_same<integer_sequence<bool, values..., false>, integer_sequence<bool, false, values...> >::value>;
+
 struct meta_yes { char a[1]; };
 struct meta_no  { char a[2]; };

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -289,6 +289,7 @@ ei_add_test(random_matrix)
 ei_add_test(initializer_list_construction)
 ei_add_test(diagonal_matrix_variadic_ctor)
 ei_add_test(serializer)
+ei_add_test(tuple_test)

 add_executable(bug1213 bug1213.cpp bug1213_main.cpp)

--- a/test/tuple_test.cpp
+++ b/test/tuple_test.cpp
@ -0,0 +1,123 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 The Eigen Team
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <Eigen/Core>
+#include <Eigen/src/Core/arch/GPU/Tuple.h>
+
+using namespace Eigen::internal;
+using Eigen::internal::tuple_impl::tuple;
+  
+void basic_tuple_test() {  
+  // Construction.
+  tuple<> tuple0 {};
+  tuple<int> tuple1 {1};
+  tuple<int, float> tuple2 {3, 5.0f};
+  tuple<int, float, double> tuple3 {7, 11.0f, 13.0};
+  // Default construction.
+  tuple<> tuple0default;
+  EIGEN_UNUSED_VARIABLE(tuple0default)
+  tuple<int> tuple1default;
+  EIGEN_UNUSED_VARIABLE(tuple1default)
+  tuple<int, float> tuple2default;
+  EIGEN_UNUSED_VARIABLE(tuple2default)
+  tuple<int, float, double> tuple3default;
+  EIGEN_UNUSED_VARIABLE(tuple3default)
+  
+  // Assignment.
+  tuple<> tuple0b = tuple0;
+  EIGEN_UNUSED_VARIABLE(tuple0b)
+  decltype(tuple1) tuple1b = tuple1;
+  EIGEN_UNUSED_VARIABLE(tuple1b)
+  decltype(tuple2) tuple2b = tuple2;
+  EIGEN_UNUSED_VARIABLE(tuple2b)
+  decltype(tuple3) tuple3b = tuple3;
+  EIGEN_UNUSED_VARIABLE(tuple3b)
+  
+  // get.
+  VERIFY_IS_EQUAL(tuple_impl::get<0>(tuple3), 7);
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple3), 11.0f);
+  VERIFY_IS_EQUAL(tuple_impl::get<2>(tuple3), 13.0);
+  
+  // tuple_impl::tuple_size.
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple0)>::value, 0);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple1)>::value, 1);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple2)>::value, 2);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple3)>::value, 3);
+  
+  // tuple_impl::tuple_cat.
+  auto tuple2cat3 = tuple_impl::tuple_cat(tuple2, tuple3);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple2cat3)>::value, 5);
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple2cat3), 5.0f);
+  VERIFY_IS_EQUAL(tuple_impl::get<3>(tuple2cat3), 11.0f);
+  auto tuple3cat0 = tuple_impl::tuple_cat(tuple3, tuple0);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple3cat0)>::value, 3);
+  auto singlecat = tuple_impl::tuple_cat(tuple3);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(singlecat)>::value, 3);
+  auto emptycat = tuple_impl::tuple_cat();
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(emptycat)>::value, 0);
+  auto tuple0cat1cat2cat3 = tuple_impl::tuple_cat(tuple0, tuple1, tuple2, tuple3);
+  VERIFY_IS_EQUAL(tuple_impl::tuple_size<decltype(tuple0cat1cat2cat3)>::value, 6);
+  
+  // make_tuple.
+  // The tuple types should uses values for the second and fourth parameters.
+  double tmp = 20;
+  auto tuple_make = tuple_impl::make_tuple(int(10), tmp, float(20.0f), tuple0);
+  VERIFY( (std::is_same<decltype(tuple_make), tuple<int, double, float, tuple<> > >::value) );
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple_make), tmp);
+  
+  // forward_as_tuple.
+  // The tuple types should uses references for the second and fourth parameters.
+  auto tuple_forward = tuple_impl::forward_as_tuple(int(10), tmp, float(20.0f), tuple0);
+  VERIFY( (std::is_same<decltype(tuple_forward), tuple<int, double&, float, tuple<>& > >::value) );
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple_forward), tmp);
+  
+  // tie.
+  auto tuple_tie = tuple_impl::tie(tuple0, tuple1, tuple2, tuple3);
+  VERIFY( (std::is_same<decltype(tuple_tie), 
+                        tuple<decltype(tuple0)&,
+                              decltype(tuple1)&,
+                              decltype(tuple2)&,
+                              decltype(tuple3)&> >::value) );
+  VERIFY_IS_EQUAL( (tuple_impl::get<1>(tuple_impl::get<2>(tuple_tie))), 5.0 );
+  // Modify value and ensure tuple2 is updated.
+  tuple_impl::get<1>(tuple_impl::get<2>(tuple_tie)) = 10.0;
+  VERIFY_IS_EQUAL( (tuple_impl::get<1>(tuple2)), 10.0 );
+  
+  // Assignment.
+  int x = -1;
+  float y = -1;
+  double z = -1;
+  tuple_impl::tie(x, y, z) = tuple3;
+  VERIFY_IS_EQUAL(x, tuple_impl::get<0>(tuple3));
+  VERIFY_IS_EQUAL(y, tuple_impl::get<1>(tuple3));
+  VERIFY_IS_EQUAL(z, tuple_impl::get<2>(tuple3));
+  tuple<int, float, double> tuple3c(-2, -2, -2);
+  tuple3c = std::move(tuple3b);
+  VERIFY_IS_EQUAL(tuple_impl::get<0>(tuple3c), tuple_impl::get<0>(tuple3));
+  VERIFY_IS_EQUAL(tuple_impl::get<1>(tuple3c), tuple_impl::get<1>(tuple3));
+  VERIFY_IS_EQUAL(tuple_impl::get<2>(tuple3c), tuple_impl::get<2>(tuple3));
+}
+
+void eigen_tuple_test() {
+  tuple<Eigen::Matrix3d, Eigen::MatrixXd> tuple;
+  tuple_impl::get<0>(tuple).setRandom();
+  tuple_impl::get<1>(tuple).setRandom(10, 10);
+  
+  auto tuple_tie = tuple_impl::tie(tuple_impl::get<0>(tuple), tuple_impl::get<1>(tuple));
+  tuple_impl::get<1>(tuple_tie).setIdentity();
+  VERIFY(tuple_impl::get<1>(tuple).isIdentity());
+}
+
+EIGEN_DECLARE_TEST(tuple)
+{
+  CALL_SUBTEST(basic_tuple_test());
+  CALL_SUBTEST(eigen_tuple_test());
+}