This commit is contained in:
Rasmus Munk Larsen 2016-02-04 14:32:19 -08:00
commit 093f2b3c01
25 changed files with 607 additions and 165 deletions

View File

@ -76,32 +76,6 @@ public:
#endif #endif
}; };
// template<typename Lhs, typename Rhs> struct product_tag
// {
// private:
//
// typedef typename remove_all<Lhs>::type _Lhs;
// typedef typename remove_all<Rhs>::type _Rhs;
// enum {
// Rows = _Lhs::RowsAtCompileTime,
// Cols = _Rhs::ColsAtCompileTime,
// Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, _Rhs::RowsAtCompileTime)
// };
//
// enum {
// rows_select = Rows==1 ? int(Rows) : int(Large),
// cols_select = Cols==1 ? int(Cols) : int(Large),
// depth_select = Depth==1 ? int(Depth) : int(Large)
// };
// typedef product_type_selector<rows_select, cols_select, depth_select> selector;
//
// public:
// enum {
// ret = selector::ret
// };
//
// };
/* The following allows to select the kind of product at compile time /* The following allows to select the kind of product at compile time
* based on the three dimensions of the product. * based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
@ -125,8 +99,8 @@ template<> struct product_type_selector<Small,Small,Large> { enum
template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Small,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Small,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Large,Large> { enum { ret = GemmProduct }; };
template<> struct product_type_selector<Large,Small,Small> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Small,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Small,Large,Small> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Small,Large,Small> { enum { ret = CoeffBasedProductMode }; };
template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; }; template<> struct product_type_selector<Large,Large,Small> { enum { ret = GemmProduct }; };
} // end namespace internal } // end namespace internal

View File

@ -134,6 +134,11 @@ pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
return static_cast<TgtPacket>(a); return static_cast<TgtPacket>(a);
} }
template <typename SrcPacket, typename TgtPacket>
EIGEN_DEVICE_FUNC inline TgtPacket
pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
return static_cast<TgtPacket>(a);
}
/** \internal \returns a + b (coeff-wise) */ /** \internal \returns a + b (coeff-wise) */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet template<typename Packet> EIGEN_DEVICE_FUNC inline Packet

View File

@ -134,7 +134,24 @@ struct lgamma_impl<double> {
* Implementation of digamma (psi) * * Implementation of digamma (psi) *
****************************************************************************/ ****************************************************************************/
#ifdef EIGEN_HAS_C99_MATH template <typename Scalar>
struct digamma_retval {
typedef Scalar type;
};
#ifndef EIGEN_HAS_C99_MATH
template <typename Scalar>
struct digamma_impl {
EIGEN_DEVICE_FUNC
static Scalar run(Scalar x) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0);
}
};
#else
/* /*
* *
@ -202,14 +219,6 @@ struct digamma_impl_maybe_poly<double> {
} }
}; };
#endif // EIGEN_HAS_C99_MATH
template <typename Scalar>
struct digamma_retval {
typedef Scalar type;
};
#ifdef EIGEN_HAS_C99_MATH
template <typename Scalar> template <typename Scalar>
struct digamma_impl { struct digamma_impl {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC

View File

@ -178,7 +178,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// We also include a register-level block of the result (mx x nr). // We also include a register-level block of the result (mx x nr).
// (In an ideal world only the lhs panel would stay in L1) // (In an ideal world only the lhs panel would stay in L1)
// Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of: // Moreover, kc has to be a multiple of 8 to be compatible with loop peeling, leading to a maximum blocking size of:
const Index max_kc = ((l1-k_sub)/k_div) & (~(k_peeling-1)); const Index max_kc = std::max<Index>(((l1-k_sub)/k_div) & (~(k_peeling-1)),1);
const Index old_k = k; const Index old_k = k;
if(k>max_kc) if(k>max_kc)
{ {

View File

@ -20,6 +20,7 @@
#pragma warning( push ) #pragma warning( push )
#endif #endif
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800) #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
#elif defined __INTEL_COMPILER #elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed) // 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body // ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
@ -32,6 +33,7 @@
#pragma warning push #pragma warning push
#endif #endif
#pragma warning disable 2196 279 1684 2259 #pragma warning disable 2196 279 1684 2259
#elif defined __clang__ #elif defined __clang__
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
// this is really a stupid warning as it warns on compile-time expressions involving enums // this is really a stupid warning as it warns on compile-time expressions involving enums
@ -39,6 +41,17 @@
#pragma clang diagnostic push #pragma clang diagnostic push
#endif #endif
#pragma clang diagnostic ignored "-Wconstant-logical-operand" #pragma clang diagnostic ignored "-Wconstant-logical-operand"
#elif defined __NVCC__
// Disable the "statement is unreachable" message
#pragma diag_suppress code_is_unreachable
// Disable the "dynamic initialization in unreachable code" message
#pragma diag_suppress initialization_not_reachable
// Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are 4 of them)
#pragma diag_suppress 2651
#pragma diag_suppress 2653
#pragma diag_suppress 2668
#pragma diag_suppress 2670
#endif #endif
#endif // not EIGEN_WARNINGS_DISABLED #endif // not EIGEN_WARNINGS_DISABLED

View File

@ -94,10 +94,6 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename Decomposition, typename Rhstype> class Solve; template<typename Decomposition, typename Rhstype> class Solve;
template<typename XprType> class Inverse; template<typename XprType> class Inverse;
namespace internal {
template<typename Lhs, typename Rhs> struct product_tag;
}
template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product; template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
template<typename Derived> class DiagonalBase; template<typename Derived> class DiagonalBase;

View File

@ -8,6 +8,14 @@
#pragma warning pop #pragma warning pop
#elif defined __clang__ #elif defined __clang__
#pragma clang diagnostic pop #pragma clang diagnostic pop
#elif defined __NVCC__
// Don't reenable the diagnostic messages, as it turns out these messages need
// to be disabled at the point of the template instantiation (i.e the user code)
// otherwise they'll be triggeredby nvcc.
// #pragma diag_default code_is_unreachable
// #pragma diag_default initialization_not_reachable
// #pragma diag_default 2651
// #pragma diag_default 2653
#endif #endif
#endif #endif

View File

@ -73,8 +73,15 @@ public:
Index m_outerStart; Index m_outerStart;
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize; const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
public: protected:
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) // Disable assignment with clear error message.
// Note that simply removing operator= yields compilation errors with ICC+MSVC
template<typename T>
BlockImpl& operator=(const T&)
{
EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
return *this;
}
}; };
@ -424,8 +431,6 @@ public:
friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >; friend struct internal::unary_evaluator<Block<XprType,BlockRows,BlockCols,InnerPanel>, internal::IteratorBased, Scalar >;
Index nonZeros() const { return Dynamic; } Index nonZeros() const { return Dynamic; }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
typename internal::ref_selector<XprType>::non_const_type m_matrix; typename internal::ref_selector<XprType>::non_const_type m_matrix;
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
@ -433,6 +438,16 @@ public:
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows; const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols; const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_blockCols;
protected:
// Disable assignment with clear error message.
// Note that simply removing operator= yields compilation errors with ICC+MSVC
template<typename T>
BlockImpl& operator=(const T&)
{
EIGEN_STATIC_ASSERT(sizeof(T)==0, THIS_SPARSE_BLOCK_SUBEXPRESSION_IS_READ_ONLY);
return *this;
}
}; };
namespace internal { namespace internal {

View File

@ -13,32 +13,24 @@
#include "details.h" #include "details.h"
// Define the explicit instantiation (e.g. necessary for the Intel compiler)
#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
#define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...) template class std::deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
#else
#define EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(...)
#endif
/** /**
* This section contains a convenience MACRO which allows an easy specialization of * This section contains a convenience MACRO which allows an easy specialization of
* std::deque such that for data types with alignment issues the correct allocator * std::deque such that for data types with alignment issues the correct allocator
* is used automatically. * is used automatically.
*/ */
#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) \ #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) \
EIGEN_EXPLICIT_STL_DEQUE_INSTANTIATION(__VA_ARGS__) \
namespace std \ namespace std \
{ \ { \
template<typename _Ay> \ template<> \
class deque<__VA_ARGS__, _Ay> \ class deque<__VA_ARGS__, std::allocator<__VA_ARGS__> > \
: public deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \ : public deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \
{ \ { \
typedef deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > deque_base; \ typedef deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > deque_base; \
public: \ public: \
typedef __VA_ARGS__ value_type; \ typedef __VA_ARGS__ value_type; \
typedef typename deque_base::allocator_type allocator_type; \ typedef deque_base::allocator_type allocator_type; \
typedef typename deque_base::size_type size_type; \ typedef deque_base::size_type size_type; \
typedef typename deque_base::iterator iterator; \ typedef deque_base::iterator iterator; \
explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {} \ explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {} \
template<typename InputIterator> \ template<typename InputIterator> \
deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : deque_base(first, last, a) {} \ deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : deque_base(first, last, a) {} \

View File

@ -12,32 +12,24 @@
#include "details.h" #include "details.h"
// Define the explicit instantiation (e.g. necessary for the Intel compiler)
#if EIGEN_COMP_GNUC || EIGEN_COMP_ICC
#define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...) template class std::list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> >;
#else
#define EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(...)
#endif
/** /**
* This section contains a convenience MACRO which allows an easy specialization of * This section contains a convenience MACRO which allows an easy specialization of
* std::list such that for data types with alignment issues the correct allocator * std::list such that for data types with alignment issues the correct allocator
* is used automatically. * is used automatically.
*/ */
#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) \ #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) \
EIGEN_EXPLICIT_STL_LIST_INSTANTIATION(__VA_ARGS__) \
namespace std \ namespace std \
{ \ { \
template<typename _Ay> \ template<> \
class list<__VA_ARGS__, _Ay> \ class list<__VA_ARGS__, std::allocator<__VA_ARGS__> > \
: public list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \ : public list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \
{ \ { \
typedef list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > list_base; \ typedef list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > list_base; \
public: \ public: \
typedef __VA_ARGS__ value_type; \ typedef __VA_ARGS__ value_type; \
typedef typename list_base::allocator_type allocator_type; \ typedef list_base::allocator_type allocator_type; \
typedef typename list_base::size_type size_type; \ typedef list_base::size_type size_type; \
typedef typename list_base::iterator iterator; \ typedef list_base::iterator iterator; \
explicit list(const allocator_type& a = allocator_type()) : list_base(a) {} \ explicit list(const allocator_type& a = allocator_type()) : list_base(a) {} \
template<typename InputIterator> \ template<typename InputIterator> \
list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : list_base(first, last, a) {} \ list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : list_base(first, last, a) {} \

View File

@ -21,7 +21,7 @@ i.e either row major or column major. The default is column major. Most arithmet
<td> Resize/Reserve</td> <td> Resize/Reserve</td>
<td> <td>
\code \code
sm1.resize(m,n); //Change sm1 to a m x n matrix. sm1.resize(m,n); // Change sm1 to a m x n matrix.
sm1.reserve(nnz); // Allocate room for nnz nonzeros elements. sm1.reserve(nnz); // Allocate room for nnz nonzeros elements.
\endcode \endcode
</td> </td>
@ -151,10 +151,10 @@ It is easy to perform arithmetic operations on sparse matrices provided that the
<td> Permutation </td> <td> Permutation </td>
<td> <td>
\code \code
perm.indices(); // Reference to the vector of indices perm.indices(); // Reference to the vector of indices
sm1.twistedBy(perm); // Permute rows and columns sm1.twistedBy(perm); // Permute rows and columns
sm2 = sm1 * perm; //Permute the columns sm2 = sm1 * perm; // Permute the columns
sm2 = perm * sm1; // Permute the columns sm2 = perm * sm1; // Permute the columns
\endcode \endcode
</td> </td>
<td> <td>
@ -181,9 +181,9 @@ sm2 = perm * sm1; // Permute the columns
\section sparseotherops Other supported operations \section sparseotherops Other supported operations
<table class="manual"> <table class="manual">
<tr><th>Operations</th> <th> Code </th> <th> Notes</th> </tr> <tr><th style="min-width:initial"> Code </th> <th> Notes</th> </tr>
<tr><td colspan="2">Sub-matrices</td></tr>
<tr> <tr>
<td>Sub-matrices</td>
<td> <td>
\code \code
sm1.block(startRow, startCol, rows, cols); sm1.block(startRow, startCol, rows, cols);
@ -193,25 +193,31 @@ sm2 = perm * sm1; // Permute the columns
sm1.bottomLeftCorner( rows, cols); sm1.bottomLeftCorner( rows, cols);
sm1.bottomRightCorner( rows, cols); sm1.bottomRightCorner( rows, cols);
\endcode \endcode
</td> <td> </td> </td><td>
Contrary to dense matrices, here <strong>all these methods are read-only</strong>.\n
See \ref TutorialSparse_SubMatrices and below for read-write sub-matrices.
</td>
</tr> </tr>
<tr> <tr class="alt"><td colspan="2"> Range </td></tr>
<td> Range </td> <tr class="alt">
<td> <td>
\code \code
sm1.innerVector(outer); sm1.innerVector(outer); // RW
sm1.innerVectors(start, size); sm1.innerVectors(start, size); // RW
sm1.leftCols(size); sm1.leftCols(size); // RW
sm2.rightCols(size); sm2.rightCols(size); // RO because sm2 is row-major
sm1.middleRows(start, numRows); sm1.middleRows(start, numRows); // RO becasue sm1 is column-major
sm1.middleCols(start, numCols); sm1.middleCols(start, numCols); // RW
sm1.col(j); sm1.col(j); // RW
\endcode \endcode
</td> </td>
<td>A inner vector is either a row (for row-major) or a column (for column-major). As stated earlier, the evaluation can be done in a matrix with different storage order </td> <td>
A inner vector is either a row (for row-major) or a column (for column-major).\n
As stated earlier, for a read-write sub-matrix (RW), the evaluation can be done in a matrix with different storage order.
</td>
</tr> </tr>
<tr><td colspan="2"> Triangular and selfadjoint views</td></tr>
<tr> <tr>
<td> Triangular and selfadjoint views</td>
<td> <td>
\code \code
sm2 = sm1.triangularview<Lower>(); sm2 = sm1.triangularview<Lower>();
@ -222,26 +228,30 @@ sm2 = perm * sm1; // Permute the columns
\code \code
\endcode </td> \endcode </td>
</tr> </tr>
<tr> <tr class="alt"><td colspan="2">Triangular solve </td></tr>
<td>Triangular solve </td> <tr class="alt">
<td> <td>
\code \code
dv2 = sm1.triangularView<Upper>().solve(dv1); dv2 = sm1.triangularView<Upper>().solve(dv1);
dv2 = sm1.topLeftCorner(size, size).triangularView<Lower>().solve(dv1); dv2 = sm1.topLeftCorner(size, size)
.triangularView<Lower>().solve(dv1);
\endcode \endcode
</td> </td>
<td> For general sparse solve, Use any suitable module described at \ref TopicSparseSystems </td> <td> For general sparse solve, Use any suitable module described at \ref TopicSparseSystems </td>
</tr> </tr>
<tr><td colspan="2"> Low-level API</td></tr>
<tr> <tr>
<td> Low-level API</td>
<td> <td>
\code \code
sm1.valuePtr(); // Pointer to the values sm1.valuePtr(); // Pointer to the values
sm1.innerIndextr(); // Pointer to the indices. sm1.innerIndextr(); // Pointer to the indices.
sm1.outerIndexPtr(); //Pointer to the beginning of each inner vector sm1.outerIndexPtr(); // Pointer to the beginning of each inner vector
\endcode \endcode
</td> </td>
<td> If the matrix is not in compressed form, makeCompressed() should be called before. Note that these functions are mostly provided for interoperability purposes with external libraries. A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section</td> <td>
If the matrix is not in compressed form, makeCompressed() should be called before.\n
Note that these functions are mostly provided for interoperability purposes with external libraries.\n
A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section</td>
</tr> </tr>
</table> </table>
*/ */

View File

@ -241,11 +241,11 @@ In the following \em sm denotes a sparse matrix, \em sv a sparse vector, \em dm
sm1.real() sm1.imag() -sm1 0.5*sm1 sm1.real() sm1.imag() -sm1 0.5*sm1
sm1+sm2 sm1-sm2 sm1.cwiseProduct(sm2) sm1+sm2 sm1-sm2 sm1.cwiseProduct(sm2)
\endcode \endcode
However, a strong restriction is that the storage orders must match. For instance, in the following example: However, <strong>a strong restriction is that the storage orders must match</strong>. For instance, in the following example:
\code \code
sm4 = sm1 + sm2 + sm3; sm4 = sm1 + sm2 + sm3;
\endcode \endcode
sm1, sm2, and sm3 must all be row-major or all column major. sm1, sm2, and sm3 must all be row-major or all column-major.
On the other hand, there is no restriction on the target matrix sm4. On the other hand, there is no restriction on the target matrix sm4.
For instance, this means that for computing \f$ A^T + A \f$, the matrix \f$ A^T \f$ must be evaluated into a temporary matrix of compatible storage order: For instance, this means that for computing \f$ A^T + A \f$, the matrix \f$ A^T \f$ must be evaluated into a temporary matrix of compatible storage order:
\code \code
@ -311,6 +311,26 @@ sm2 = sm1.transpose() * P;
\endcode \endcode
\subsection TutorialSparse_SubMatrices Block operations
Regarding read-access, sparse matrices expose the same API than for dense matrices to access to sub-matrices such as blocks, columns, and rows. See \ref TutorialBlockOperations for a detailed introduction.
However, for performance reasons, writing to a sub-sparse-matrix is much more limited, and currently only contiguous sets of columns (resp. rows) of a column-major (resp. row-major) SparseMatrix are writable. Moreover, this information has to be known at compile-time, leaving out methods such as <tt>block(...)</tt> and <tt>corner*(...)</tt>. The available API for write-access to a SparseMatrix are summarized below:
\code
SparseMatrix<double,ColMajor> sm1;
sm1.col(j) = ...;
sm1.leftCols(ncols) = ...;
sm1.middleCols(j,ncols) = ...;
sm1.rightCols(ncols) = ...;
SparseMatrix<double,RowMajor> sm2;
sm2.row(i) = ...;
sm2.topRows(nrows) = ...;
sm2.middleRows(i,nrows) = ...;
sm2.bottomRows(nrows) = ...;
\endcode
In addition, sparse matrices expose the SparseMatrixBase::innerVector() and SparseMatrixBase::innerVectors() methods, which are aliases to the col/middleCols methods for a column-major storage, and to the row/middleRows methods for a row-major storage.
\subsection TutorialSparse_TriangularSelfadjoint Triangular and selfadjoint views \subsection TutorialSparse_TriangularSelfadjoint Triangular and selfadjoint views
Just as with dense matrices, the triangularView() function can be used to address a triangular part of the matrix, and perform triangular solves with a dense right hand side: Just as with dense matrices, the triangularView() function can be used to address a triangular part of the matrix, and perform triangular solves with a dense right hand side:

View File

@ -226,7 +226,9 @@ ei_add_test(geo_homogeneous)
ei_add_test(stdvector) ei_add_test(stdvector)
ei_add_test(stdvector_overload) ei_add_test(stdvector_overload)
ei_add_test(stdlist) ei_add_test(stdlist)
ei_add_test(stdlist_overload)
ei_add_test(stddeque) ei_add_test(stddeque)
ei_add_test(stddeque_overload)
ei_add_test(sparse_basic) ei_add_test(sparse_basic)
ei_add_test(sparse_block) ei_add_test(sparse_block)
ei_add_test(sparse_vector) ei_add_test(sparse_vector)

159
test/stddeque_overload.cpp Normal file
View File

@ -0,0 +1,159 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/StdDeque>
#include <Eigen/Geometry>
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Vector4f)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix2f)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix4f)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix4d)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Affine3f)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Affine3d)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Quaternionf)
EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Quaterniond)
template<typename MatrixType>
void check_stddeque_matrix(const MatrixType& m)
{
typename MatrixType::Index rows = m.rows();
typename MatrixType::Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::deque<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
v = w;
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(w[i], v[i]);
}
v.resize(21);
v[20] = x;
VERIFY_IS_APPROX(v[20], x);
v.resize(22,y);
VERIFY_IS_APPROX(v[21], y);
v.push_back(x);
VERIFY_IS_APPROX(v[22], x);
VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType));
// do a lot of push_back such that the deque gets internally resized
// (with memory reallocation)
MatrixType* ref = &w[0];
for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i)
v.push_back(w[i%w.size()]);
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(v[i]==w[(i-23)%w.size()]);
}
}
template<typename TransformType>
void check_stddeque_transform(const TransformType&)
{
typedef typename TransformType::MatrixType MatrixType;
TransformType x(MatrixType::Random()), y(MatrixType::Random());
std::deque<TransformType> v(10), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
v = w;
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(w[i], v[i]);
}
v.resize(21);
v[20] = x;
VERIFY_IS_APPROX(v[20], x);
v.resize(22,y);
VERIFY_IS_APPROX(v[21], y);
v.push_back(x);
VERIFY_IS_APPROX(v[22], x);
// do a lot of push_back such that the deque gets internally resized
// (with memory reallocation)
TransformType* ref = &w[0];
for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i)
v.push_back(w[i%w.size()]);
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(v[i].matrix()==w[(i-23)%w.size()].matrix());
}
}
template<typename QuaternionType>
void check_stddeque_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::deque<QuaternionType> v(10), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
v = w;
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(w[i], v[i]);
}
v.resize(21);
v[20] = x;
VERIFY_IS_APPROX(v[20], x);
v.resize(22,y);
VERIFY_IS_APPROX(v[21], y);
v.push_back(x);
VERIFY_IS_APPROX(v[22], x);
// do a lot of push_back such that the deque gets internally resized
// (with memory reallocation)
QuaternionType* ref = &w[0];
for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i)
v.push_back(w[i%w.size()]);
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(v[i].coeffs()==w[(i-23)%w.size()].coeffs());
}
}
void test_stddeque_overload()
{
// some non vectorizable fixed sizes
CALL_SUBTEST_1(check_stddeque_matrix(Vector2f()));
CALL_SUBTEST_1(check_stddeque_matrix(Matrix3f()));
CALL_SUBTEST_2(check_stddeque_matrix(Matrix3d()));
// some vectorizable fixed sizes
CALL_SUBTEST_1(check_stddeque_matrix(Matrix2f()));
CALL_SUBTEST_1(check_stddeque_matrix(Vector4f()));
CALL_SUBTEST_1(check_stddeque_matrix(Matrix4f()));
CALL_SUBTEST_2(check_stddeque_matrix(Matrix4d()));
// some dynamic sizes
CALL_SUBTEST_3(check_stddeque_matrix(MatrixXd(1,1)));
CALL_SUBTEST_3(check_stddeque_matrix(VectorXd(20)));
CALL_SUBTEST_3(check_stddeque_matrix(RowVectorXf(20)));
CALL_SUBTEST_3(check_stddeque_matrix(MatrixXcf(10,10)));
// some Transform
CALL_SUBTEST_4(check_stddeque_transform(Affine2f())); // does not need the specialization (2+1)^2 = 9
CALL_SUBTEST_4(check_stddeque_transform(Affine3f()));
CALL_SUBTEST_4(check_stddeque_transform(Affine3d()));
// some Quaternion
CALL_SUBTEST_5(check_stddeque_quaternion(Quaternionf()));
CALL_SUBTEST_5(check_stddeque_quaternion(Quaterniond()));
}

192
test/stdlist_overload.cpp Normal file
View File

@ -0,0 +1,192 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <Eigen/StdList>
#include <Eigen/Geometry>
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Vector4f)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix2f)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix4f)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix4d)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Affine3f)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Affine3d)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Quaternionf)
EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Quaterniond)
template <class Container, class Position>
typename Container::iterator get(Container & c, Position position)
{
typename Container::iterator it = c.begin();
std::advance(it, position);
return it;
}
template <class Container, class Position, class Value>
void set(Container & c, Position position, const Value & value)
{
typename Container::iterator it = c.begin();
std::advance(it, position);
*it = value;
}
template<typename MatrixType>
void check_stdlist_matrix(const MatrixType& m)
{
typename MatrixType::Index rows = m.rows();
typename MatrixType::Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::list<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
typename std::list<MatrixType>::iterator itv = get(v, 5);
typename std::list<MatrixType>::iterator itw = get(w, 6);
*itv = x;
*itw = *itv;
VERIFY_IS_APPROX(*itw, *itv);
v = w;
itv = v.begin();
itw = w.begin();
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(*itw, *itv);
++itv;
++itw;
}
v.resize(21);
set(v, 20, x);
VERIFY_IS_APPROX(*get(v, 20), x);
v.resize(22,y);
VERIFY_IS_APPROX(*get(v, 21), y);
v.push_back(x);
VERIFY_IS_APPROX(*get(v, 22), x);
// do a lot of push_back such that the list gets internally resized
// (with memory reallocation)
MatrixType* ref = &(*get(w, 0));
for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i)
v.push_back(*get(w, i%w.size()));
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY((*get(v, i))==(*get(w, (i-23)%w.size())));
}
}
template<typename TransformType>
void check_stdlist_transform(const TransformType&)
{
typedef typename TransformType::MatrixType MatrixType;
TransformType x(MatrixType::Random()), y(MatrixType::Random());
std::list<TransformType> v(10), w(20, y);
typename std::list<TransformType>::iterator itv = get(v, 5);
typename std::list<TransformType>::iterator itw = get(w, 6);
*itv = x;
*itw = *itv;
VERIFY_IS_APPROX(*itw, *itv);
v = w;
itv = v.begin();
itw = w.begin();
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(*itw, *itv);
++itv;
++itw;
}
v.resize(21);
set(v, 20, x);
VERIFY_IS_APPROX(*get(v, 20), x);
v.resize(22,y);
VERIFY_IS_APPROX(*get(v, 21), y);
v.push_back(x);
VERIFY_IS_APPROX(*get(v, 22), x);
// do a lot of push_back such that the list gets internally resized
// (with memory reallocation)
TransformType* ref = &(*get(w, 0));
for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i)
v.push_back(*get(w, i%w.size()));
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(get(v, i)->matrix()==get(w, (i-23)%w.size())->matrix());
}
}
template<typename QuaternionType>
void check_stdlist_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::list<QuaternionType> v(10), w(20, y);
typename std::list<QuaternionType>::iterator itv = get(v, 5);
typename std::list<QuaternionType>::iterator itw = get(w, 6);
*itv = x;
*itw = *itv;
VERIFY_IS_APPROX(*itw, *itv);
v = w;
itv = v.begin();
itw = w.begin();
for(int i = 0; i < 20; i++)
{
VERIFY_IS_APPROX(*itw, *itv);
++itv;
++itw;
}
v.resize(21);
set(v, 20, x);
VERIFY_IS_APPROX(*get(v, 20), x);
v.resize(22,y);
VERIFY_IS_APPROX(*get(v, 21), y);
v.push_back(x);
VERIFY_IS_APPROX(*get(v, 22), x);
// do a lot of push_back such that the list gets internally resized
// (with memory reallocation)
QuaternionType* ref = &(*get(w, 0));
for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i)
v.push_back(*get(w, i%w.size()));
for(unsigned int i=23; i<v.size(); ++i)
{
VERIFY(get(v, i)->coeffs()==get(w, (i-23)%w.size())->coeffs());
}
}
void test_stdlist_overload()
{
// some non vectorizable fixed sizes
CALL_SUBTEST_1(check_stdlist_matrix(Vector2f()));
CALL_SUBTEST_1(check_stdlist_matrix(Matrix3f()));
CALL_SUBTEST_2(check_stdlist_matrix(Matrix3d()));
// some vectorizable fixed sizes
CALL_SUBTEST_1(check_stdlist_matrix(Matrix2f()));
CALL_SUBTEST_1(check_stdlist_matrix(Vector4f()));
CALL_SUBTEST_1(check_stdlist_matrix(Matrix4f()));
CALL_SUBTEST_2(check_stdlist_matrix(Matrix4d()));
// some dynamic sizes
CALL_SUBTEST_3(check_stdlist_matrix(MatrixXd(1,1)));
CALL_SUBTEST_3(check_stdlist_matrix(VectorXd(20)));
CALL_SUBTEST_3(check_stdlist_matrix(RowVectorXf(20)));
CALL_SUBTEST_3(check_stdlist_matrix(MatrixXcf(10,10)));
// some Transform
CALL_SUBTEST_4(check_stdlist_transform(Affine2f())); // does not need the specialization (2+1)^2 = 9
CALL_SUBTEST_4(check_stdlist_transform(Affine3f()));
CALL_SUBTEST_4(check_stdlist_transform(Affine3d()));
// some Quaternion
CALL_SUBTEST_5(check_stdlist_quaternion(Quaternionf()));
CALL_SUBTEST_5(check_stdlist_quaternion(Quaterniond()));
}

View File

@ -165,10 +165,10 @@ template <typename T> class array<T, 0> {
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE array() { } EIGEN_STRONG_INLINE array() : dummy() { }
#ifdef EIGEN_HAS_VARIADIC_TEMPLATES #ifdef EIGEN_HAS_VARIADIC_TEMPLATES
EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) { EIGEN_DEVICE_FUNC array(std::initializer_list<T> l) : dummy() {
eigen_assert(l.size() == 0); eigen_assert(l.size() == 0);
} }
#endif #endif

View File

@ -344,7 +344,7 @@ class TensorContractionSubMapper {
enum { enum {
// We can use direct offsets iff the parent mapper supports then and we can compute the strides. // We can use direct offsets iff the parent mapper supports then and we can compute the strides.
// TODO: we should also enable direct offsets for the Rhs case. // TODO: we should also enable direct offsets for the Rhs case.
UseDirectOffsets = (side == Lhs) && inner_dim_contiguous && ParentMapper::DirectOffsets UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size<contract_t>::value > 0)
}; };
EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)

View File

@ -86,6 +86,26 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
const TensorEvaluator& m_impl; const TensorEvaluator& m_impl;
}; };
template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
PacketConverter(const TensorEvaluator& impl)
: m_impl(impl) {}
template<int LoadMode, typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
SrcPacket src1 = m_impl.template packet<LoadMode>(index);
SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
return result;
}
private:
const TensorEvaluator& m_impl;
};
template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {

View File

@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface {
struct GpuDevice { struct GpuDevice {
// The StreamInterface is not owned: the caller is // The StreamInterface is not owned: the caller is
// responsible for its initialization and eventual destruction. // responsible for its initialization and eventual destruction.
explicit GpuDevice(const StreamInterface* stream) : stream_(stream) { explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) {
eigen_assert(stream);
}
explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
eigen_assert(stream); eigen_assert(stream);
} }
// TODO(bsteiner): This is an internal API, we should not expose it. // TODO(bsteiner): This is an internal API, we should not expose it.
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
return stream_->stream(); return stream_->stream();
@ -246,6 +248,10 @@ struct GpuDevice {
#endif #endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
return max_blocks_;
}
// This function checks if the CUDA runtime recorded an error for the // This function checks if the CUDA runtime recorded an error for the
// underlying stream device. // underlying stream device.
inline bool ok() const { inline bool ok() const {
@ -259,7 +265,7 @@ struct GpuDevice {
private: private:
const StreamInterface* stream_; const StreamInterface* stream_;
int max_blocks_;
}; };
#ifndef __CUDA_ARCH__ #ifndef __CUDA_ARCH__

View File

@ -220,7 +220,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
if (needs_assign) if (needs_assign)
{ {
const int block_size = device.maxCudaThreadsPerBlock(); const int block_size = device.maxCudaThreadsPerBlock();
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
const Index size = array_prod(evaluator.dimensions()); const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash if we're called with tensors of size 0. // Create a least one block to ensure we won't crash if we're called with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1); const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
@ -239,7 +239,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(c
if (needs_assign) if (needs_assign)
{ {
const int block_size = device.maxCudaThreadsPerBlock(); const int block_size = device.maxCudaThreadsPerBlock();
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
const Index size = array_prod(evaluator.dimensions()); const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash if we're called with tensors of size 0. // Create a least one block to ensure we won't crash if we're called with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1); const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);

View File

@ -33,18 +33,19 @@ struct TensorUInt128
HIGH high; HIGH high;
LOW low; LOW low;
template<typename OTHER_HIGH, typename OTHER_LOW>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(int32_t x) : high(0), low(x) { TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) {
EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), "high too wide");
EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), "low too wide");
}
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
explicit TensorUInt128(const T& x) : high(0), low(x) {
eigen_assert(x >= 0); eigen_assert(x >= 0);
} }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(uint32_t x) : high(0), low(x) { }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(int64_t x) : high(0), low(x) {
eigen_assert(x >= 0);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(uint64_t x) : high(0), low(x) { }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(uint64_t y, uint64_t x) : high(y), low(x) { } TensorUInt128(uint64_t y, uint64_t x) : high(y), low(x) { }

View File

@ -158,7 +158,7 @@ if(CUDA_FOUND)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
endif() endif()
set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_30") set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_30 -Xcudafe \"--display_error_number\"")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")

View File

@ -22,16 +22,16 @@ using Eigen::Tensor;
typedef Tensor<float, 1>::DimensionPair DimPair; typedef Tensor<float, 1>::DimensionPair DimPair;
template<int DataLayout> template<int DataLayout>
static void test_cuda_contraction(int m_size, int k_size, int n_size) void test_cuda_contraction(int m_size, int k_size, int n_size)
{ {
std::cout << "Calling with (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
// with these dimensions, the output has 300 * 140 elements, which is // with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on // more than 30 * 1024, which is the number of threads in blocks on
// a 15 SM GK110 GPU // a 15 SM GK110 GPU
Tensor<float, 2, DataLayout> t_left(Eigen::array<int, 2>(m_size, k_size)); Tensor<float, 2, DataLayout> t_left(m_size, k_size);
Tensor<float, 2, DataLayout> t_right(Eigen::array<int, 2>(k_size, n_size)); Tensor<float, 2, DataLayout> t_right(k_size, n_size);
Tensor<float, 2, DataLayout> t_result(Eigen::array<int, 2>(m_size, n_size)); Tensor<float, 2, DataLayout> t_result(m_size, n_size);
Tensor<float, 2, DataLayout> t_result_gpu(Eigen::array<int, 2>(m_size, n_size)); Tensor<float, 2, DataLayout> t_result_gpu(m_size, n_size);
Eigen::array<DimPair, 1> dims(DimPair(1, 0)); Eigen::array<DimPair, 1> dims(DimPair(1, 0));
t_left.setRandom(); t_left.setRandom();
@ -84,41 +84,69 @@ static void test_cuda_contraction(int m_size, int k_size, int n_size)
cudaFree((void*)d_t_result); cudaFree((void*)d_t_result);
} }
template<int DataLayout>
void test_cuda_contraction_m() {
for (int k = 32; k < 256; k++) {
test_cuda_contraction<ColMajor>(k, 128, 128);
test_cuda_contraction<RowMajor>(k, 128, 128);
}
}
template<int DataLayout>
void test_cuda_contraction_k() {
for (int k = 32; k < 256; k++) {
test_cuda_contraction<ColMajor>(128, k, 128);
test_cuda_contraction<RowMajor>(128, k, 128);
}
}
template<int DataLayout>
void test_cuda_contraction_n() {
for (int k = 32; k < 256; k++) {
test_cuda_contraction<ColMajor>(128, 128, k);
test_cuda_contraction<RowMajor>(128, 128, k);
}
}
template<int DataLayout>
void test_cuda_contraction_sizes() {
int m_sizes[] = { 31, 39, 63, 64, 65,
127, 129, 255, 257 , 511,
512, 513, 1023, 1024, 1025};
int n_sizes[] = { 31, 39, 63, 64, 65,
127, 129, 255, 257, 511,
512, 513, 1023, 1024, 1025};
int k_sizes[] = { 31, 39, 63, 64, 65,
95, 96, 127, 129, 255,
257, 511, 512, 513, 1023,
1024, 1025};
for (int i = 0; i < 15; i++) {
for (int j = 0; j < 15; j++) {
for (int k = 0; k < 17; k++) {
test_cuda_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
}
}
}
}
void test_cxx11_tensor_cuda() void test_cxx11_tensor_cuda()
{ {
std::cout << "Calling contraction tests" << std::endl; CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128));
CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, 128, 128)); CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128));
CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, 128, 128));
for (int k = 32; k < 256; k++) {
CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, k, 128));
CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, k, 128));
}
for (int k = 32; k < 256; k++) {
CALL_SUBTEST(test_cuda_contraction<ColMajor>(128, 128, k));
CALL_SUBTEST(test_cuda_contraction<RowMajor>(128, 128, k));
}
for (int k = 32; k < 256; k++) {
CALL_SUBTEST(test_cuda_contraction<ColMajor>(k, 128, 128));
CALL_SUBTEST(test_cuda_contraction<RowMajor>(k, 128, 128));
}
int m_sizes[] = {31, 39, 63, 64, 65, CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>());
127, 129, 255, 257, 511, CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>());
512, 513, 1023, 1024, 1025 };
int n_sizes[] = {31, 39, 63, 64, 65,
127, 129, 255, 257, 511,
512, 513, 1023, 1024, 1025 };
int k_sizes[] = { 31, 39, 63, 64, 65, CALL_SUBTEST_4(test_cuda_contraction_k<ColMajor>());
95, 96, 127, 129, 255, CALL_SUBTEST_5(test_cuda_contraction_k<RowMajor>());
257, 511, 512, 513, 1023,
1024, 1025};
for (int i = 0; i <15; i++) CALL_SUBTEST_6(test_cuda_contraction_n<ColMajor>());
for (int j = 0; j < 15; j++) CALL_SUBTEST_7(test_cuda_contraction_n<RowMajor>());
for (int k = 0; k < 17; k++) {
CALL_SUBTEST(test_cuda_contraction<ColMajor>(m_sizes[i], n_sizes[j], k_sizes[k])); CALL_SUBTEST_8(test_cuda_contraction_sizes<ColMajor>());
CALL_SUBTEST(test_cuda_contraction<RowMajor>(m_sizes[i], n_sizes[j], k_sizes[k])); CALL_SUBTEST_9(test_cuda_contraction_sizes<RowMajor>());
}
} }

View File

@ -109,19 +109,19 @@ struct GPUContext {
// The actual expression to evaluate // The actual expression to evaluate
template <typename Context> template <typename Context>
static void test_contextual_eval(Context* context) void test_contextual_eval(Context* context)
{ {
context->out().device(context->device()) = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f); context->out().device(context->device()) = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f);
} }
template <typename Context> template <typename Context>
static void test_forced_contextual_eval(Context* context) void test_forced_contextual_eval(Context* context)
{ {
context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f); context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f);
} }
template <typename Context> template <typename Context>
static void test_compound_assignment(Context* context) void test_compound_assignment(Context* context)
{ {
context->out().device(context->device()) = context->in1().constant(2.718f); context->out().device(context->device()) = context->in1().constant(2.718f);
context->out().device(context->device()) += context->in1() + context->in2() * 3.14f; context->out().device(context->device()) += context->in1() + context->in2() * 3.14f;
@ -129,7 +129,7 @@ static void test_compound_assignment(Context* context)
template <typename Context> template <typename Context>
static void test_contraction(Context* context) void test_contraction(Context* context)
{ {
Eigen::array<std::pair<int, int>, 2> dims; Eigen::array<std::pair<int, int>, 2> dims;
dims[0] = std::make_pair(1, 1); dims[0] = std::make_pair(1, 1);
@ -145,7 +145,7 @@ static void test_contraction(Context* context)
template <typename Context> template <typename Context>
static void test_1d_convolution(Context* context) void test_1d_convolution(Context* context)
{ {
Eigen::DSizes<int, 3> indices(0,0,0); Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(40,49,70); Eigen::DSizes<int, 3> sizes(40,49,70);
@ -155,7 +155,7 @@ static void test_1d_convolution(Context* context)
} }
template <typename Context> template <typename Context>
static void test_2d_convolution(Context* context) void test_2d_convolution(Context* context)
{ {
Eigen::DSizes<int, 3> indices(0,0,0); Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(40,49,69); Eigen::DSizes<int, 3> sizes(40,49,69);
@ -165,7 +165,7 @@ static void test_2d_convolution(Context* context)
} }
template <typename Context> template <typename Context>
static void test_3d_convolution(Context* context) void test_3d_convolution(Context* context)
{ {
Eigen::DSizes<int, 3> indices(0,0,0); Eigen::DSizes<int, 3> indices(0,0,0);
Eigen::DSizes<int, 3> sizes(39,49,69); Eigen::DSizes<int, 3> sizes(39,49,69);
@ -175,7 +175,7 @@ static void test_3d_convolution(Context* context)
} }
static void test_cpu() { void test_cpu() {
Eigen::Tensor<float, 3> in1(40,50,70); Eigen::Tensor<float, 3> in1(40,50,70);
Eigen::Tensor<float, 3> in2(40,50,70); Eigen::Tensor<float, 3> in2(40,50,70);
Eigen::Tensor<float, 3> out(40,50,70); Eigen::Tensor<float, 3> out(40,50,70);
@ -267,7 +267,7 @@ static void test_cpu() {
} }
} }
static void test_gpu() { void test_gpu() {
Eigen::Tensor<float, 3> in1(40,50,70); Eigen::Tensor<float, 3> in1(40,50,70);
Eigen::Tensor<float, 3> in2(40,50,70); Eigen::Tensor<float, 3> in2(40,50,70);
Eigen::Tensor<float, 3> out(40,50,70); Eigen::Tensor<float, 3> out(40,50,70);

View File

@ -127,7 +127,7 @@ void test_misc2() {
TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1)); TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
uint64_t actual = static_cast<uint64_t>(result); uint64_t actual = static_cast<uint64_t>(result);
VERIFY_EQUAL(actual, expected); VERIFY_IS_EQUAL(actual, expected);
} }
} }
} }