Merge with upstream eigen/default

This commit is contained in:
Eugene Zhulenev 2018-08-27 14:34:07 -07:00
commit c144bb355b
49 changed files with 668 additions and 556 deletions

View File

@ -84,7 +84,7 @@ cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> >
{ {
res.itype = CHOLMOD_INT; res.itype = CHOLMOD_INT;
} }
else if (internal::is_same<_StorageIndex,long>::value) else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value)
{ {
res.itype = CHOLMOD_LONG; res.itype = CHOLMOD_LONG;
} }
@ -168,11 +168,11 @@ namespace internal {
#define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \ #define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \
template<typename _StorageIndex> inline ret cm_ ## name (cholmod_common &Common) { return cholmod_ ## name (&Common); } \ template<typename _StorageIndex> inline ret cm_ ## name (cholmod_common &Common) { return cholmod_ ## name (&Common); } \
template<> inline ret cm_ ## name<long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); } template<> inline ret cm_ ## name<SuiteSparse_long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); }
#define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \ #define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \
template<typename _StorageIndex> inline ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_ ## name (&a1, &Common); } \ template<typename _StorageIndex> inline ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_ ## name (&a1, &Common); } \
template<> inline ret cm_ ## name<long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); } template<> inline ret cm_ ## name<SuiteSparse_long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); }
EIGEN_CHOLMOD_SPECIALIZE0(int, start) EIGEN_CHOLMOD_SPECIALIZE0(int, start)
EIGEN_CHOLMOD_SPECIALIZE0(int, finish) EIGEN_CHOLMOD_SPECIALIZE0(int, finish)
@ -184,15 +184,15 @@ EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A)
EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A) EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A)
template<typename _StorageIndex> inline cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_solve (sys, &L, &B, &Common); } template<typename _StorageIndex> inline cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_solve (sys, &L, &B, &Common); }
template<> inline cholmod_dense* cm_solve<long> (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_l_solve (sys, &L, &B, &Common); } template<> inline cholmod_dense* cm_solve<SuiteSparse_long> (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_l_solve (sys, &L, &B, &Common); }
template<typename _StorageIndex> inline cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve (sys, &L, &B, &Common); } template<typename _StorageIndex> inline cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve (sys, &L, &B, &Common); }
template<> inline cholmod_sparse* cm_spsolve<long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); } template<> inline cholmod_sparse* cm_spsolve<SuiteSparse_long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); }
template<typename _StorageIndex> template<typename _StorageIndex>
inline int cm_factorize_p (cholmod_sparse* A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p (A, beta, fset, fsize, L, &Common); } inline int cm_factorize_p (cholmod_sparse* A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p (A, beta, fset, fsize, L, &Common); }
template<> template<>
inline int cm_factorize_p<long> (cholmod_sparse* A, double beta[2], long* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); } inline int cm_factorize_p<SuiteSparse_long> (cholmod_sparse* A, double beta[2], SuiteSparse_long* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); }
#undef EIGEN_CHOLMOD_SPECIALIZE0 #undef EIGEN_CHOLMOD_SPECIALIZE0
#undef EIGEN_CHOLMOD_SPECIALIZE1 #undef EIGEN_CHOLMOD_SPECIALIZE1

View File

@ -66,6 +66,7 @@ namespace Eigen
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(logistic,scalar_logistic_op,logistic function,\sa ArrayBase::logistic)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)

View File

@ -43,6 +43,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
enum { enum {
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime, RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime, ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
SizeAtCompileTime = Base::SizeAtCompileTime SizeAtCompileTime = Base::SizeAtCompileTime
}; };
@ -187,8 +188,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
{ {
#if EIGEN_MAX_ALIGN_BYTES>0 #if EIGEN_MAX_ALIGN_BYTES>0
// innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0) eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
|| (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned"); || (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
#endif #endif
} }

View File

@ -634,13 +634,13 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a) template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
{ {
// _mm512_abs_ps intrinsic not found, so hack around it // _mm512_abs_ps intrinsic not found, so hack around it
return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff)); return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff)));
} }
template <> template <>
EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) { EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
// _mm512_abs_ps intrinsic not found, so hack around it // _mm512_abs_ps intrinsic not found, so hack around it
return (__m512d)_mm512_and_si512((__m512i)a, return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
_mm512_set1_epi64(0x7fffffffffffffff)); _mm512_set1_epi64(0x7fffffffffffffff)));
} }
#ifdef EIGEN_VECTORIZE_AVX512DQ #ifdef EIGEN_VECTORIZE_AVX512DQ

View File

@ -823,6 +823,34 @@ struct functor_traits<scalar_sign_op<Scalar> >
}; };
}; };
/** \internal
* \brief Template functor to compute the logistic function of a scalar
* \sa class CwiseUnaryOp, ArrayBase::logistic()
*/
template <typename T>
struct scalar_logistic_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
const T one = T(1);
return one / (one + numext::exp(-x));
}
template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet packetOp(const Packet& x) const {
const Packet one = pset1<Packet>(T(1));
return pdiv(one, padd(one, pexp(pnegate(x))));
}
};
template <typename T>
struct functor_traits<scalar_logistic_op<T> > {
enum {
Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
packet_traits<T>::HasNegate && packet_traits<T>::HasExp
};
};
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View File

@ -405,7 +405,7 @@ template<typename T> struct plain_matrix_type_row_major
typedef Matrix<typename traits<T>::Scalar, typedef Matrix<typename traits<T>::Scalar,
Rows, Rows,
Cols, Cols,
(MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor, (MaxCols==1&&MaxRows!=1) ? ColMajor : RowMajor,
MaxRows, MaxRows,
MaxCols MaxCols
> type; > type;

View File

@ -297,8 +297,8 @@ SluMatrix asSluMatrix(MatrixType& mat)
template<typename Scalar, int Flags, typename Index> template<typename Scalar, int Flags, typename Index>
MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat) MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
{ {
eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR)
|| (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC); || ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC));
Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow; Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;

View File

@ -21,6 +21,7 @@ typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturn
typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType; typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType;
typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType; typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType;
typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType; typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType;
typedef CwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> LogisticReturnType;
typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType; typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType; typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType; typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
@ -335,6 +336,15 @@ cosh() const
return CoshReturnType(derived()); return CoshReturnType(derived());
} }
/** \returns an expression of the coefficient-wise logistic of *this.
*/
EIGEN_DEVICE_FUNC
inline const LogisticReturnType
logistic() const
{
return LogisticReturnType(derived());
}
/** \returns an expression of the coefficient-wise inverse of *this. /** \returns an expression of the coefficient-wise inverse of *this.
* *
* Example: \include Cwise_inverse.cpp * Example: \include Cwise_inverse.cpp

View File

@ -1,4 +1,3 @@
typedef Matrix<double,4,Dynamic> Matrix4Xd;
Matrix4Xd M = Matrix4Xd::Random(4,5); Matrix4Xd M = Matrix4Xd::Random(4,5);
Projective3d P(Matrix4d::Random()); Projective3d P(Matrix4d::Random());
cout << "The matrix M is:" << endl << M << endl << endl; cout << "The matrix M is:" << endl << M << endl << endl;

View File

@ -1,4 +1,3 @@
typedef Matrix<double,3,Dynamic> Matrix3Xd;
Matrix3Xd M = Matrix3Xd::Random(3,5); Matrix3Xd M = Matrix3Xd::Random(3,5);
Projective3d P(Matrix4d::Random()); Projective3d P(Matrix4d::Random());
cout << "The matrix M is:" << endl << M << endl << endl; cout << "The matrix M is:" << endl << M << endl << endl;

View File

@ -231,6 +231,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); VERIFY_IS_APPROX(m1.sinh(), sinh(m1));
VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
VERIFY_IS_APPROX(m1.logistic(), logistic(m1));
VERIFY_IS_APPROX(m1.arg(), arg(m1)); VERIFY_IS_APPROX(m1.arg(), arg(m1));
VERIFY_IS_APPROX(m1.round(), round(m1)); VERIFY_IS_APPROX(m1.round(), round(m1));
@ -266,6 +267,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1)));
VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1)));
VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
VERIFY_IS_APPROX(logistic(m1), (1.0/(1.0+exp(-m1))));
VERIFY_IS_APPROX(arg(m1), ((m1<0).template cast<Scalar>())*std::acos(-1.0)); VERIFY_IS_APPROX(arg(m1), ((m1<0).template cast<Scalar>())*std::acos(-1.0));
VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all()); VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all());
VERIFY((Eigen::isnan)((m1*0.0)/0.0).all()); VERIFY((Eigen::isnan)((m1*0.0)/0.0).all());
@ -345,6 +347,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); VERIFY_IS_APPROX(m1.sinh(), sinh(m1));
VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.cosh(), cosh(m1));
VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
VERIFY_IS_APPROX(m1.logistic(), logistic(m1));
VERIFY_IS_APPROX(m1.arg(), arg(m1)); VERIFY_IS_APPROX(m1.arg(), arg(m1));
VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all()); VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all());
VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all()); VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all());
@ -368,6 +371,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1)));
VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1)));
VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1))));
VERIFY_IS_APPROX(logistic(m1), (1.0/(1.0 + exp(-m1))));
for (Index i = 0; i < m.rows(); ++i) for (Index i = 0; i < m.rows(); ++i)
for (Index j = 0; j < m.cols(); ++j) for (Index j = 0; j < m.cols(); ++j)

View File

@ -290,6 +290,8 @@ template<typename PlainObjectType> void check_const_correctness(const PlainObjec
// Regression for bug 1573 // Regression for bug 1573
struct MovableClass { struct MovableClass {
// The following line is a workaround for gcc 4.7 and 4.8 (see bug 1573 comments).
static_assert(std::is_nothrow_move_constructible<Quaternionf>::value,"");
MovableClass() = default; MovableClass() = default;
MovableClass(const MovableClass&) = default; MovableClass(const MovableClass&) = default;
MovableClass(MovableClass&&) noexcept = default; MovableClass(MovableClass&&) noexcept = default;

View File

@ -102,7 +102,13 @@ EIGEN_DECLARE_TEST(meta)
} }
STATIC_CHECK(( !internal::is_convertible<MyInterface, MyImpl>::value )); STATIC_CHECK(( !internal::is_convertible<MyInterface, MyImpl>::value ));
#if (!EIGEN_COMP_GNUC_STRICT) || (EIGEN_GNUC_AT_LEAST(4,8))
// GCC prior to 4.8 fails to compile this test:
// error: cannot allocate an object of abstract type 'MyInterface'
// In other word, it does not obey SFINAE.
// Nevertheless, we don't really care about supporting abstract type as scalar type!
STATIC_CHECK(( !internal::is_convertible<MyImpl, MyInterface>::value )); STATIC_CHECK(( !internal::is_convertible<MyImpl, MyInterface>::value ));
#endif
STATIC_CHECK(( internal::is_convertible<MyImpl, const MyInterface&>::value )); STATIC_CHECK(( internal::is_convertible<MyImpl, const MyInterface&>::value ));
{ {
int i; int i;

View File

@ -44,17 +44,27 @@
#include <thread> #include <thread>
#include <functional> #include <functional>
#include <memory> #include <memory>
#include "src/util/CXX11Meta.h" #include "src/util/CXX11Meta.h"
#include "src/util/MaxSizeVector.h" #include "src/util/MaxSizeVector.h"
#include "src/ThreadPool/ThreadLocal.h" #include "src/ThreadPool/ThreadLocal.h"
#ifndef EIGEN_THREAD_LOCAL
// There are non-parenthesized calls to "max" in the <unordered_map> header,
// which trigger a check in test/main.h causing compilation to fail.
// We work around the check here by removing the check for max in
// the case where we have to emulate thread_local.
#ifdef max
#undef max
#endif
#include <unordered_map>
#endif
#include "src/ThreadPool/ThreadYield.h" #include "src/ThreadPool/ThreadYield.h"
#include "src/ThreadPool/ThreadCancel.h" #include "src/ThreadPool/ThreadCancel.h"
#include "src/ThreadPool/EventCount.h" #include "src/ThreadPool/EventCount.h"
#include "src/ThreadPool/RunQueue.h" #include "src/ThreadPool/RunQueue.h"
#include "src/ThreadPool/ThreadPoolInterface.h" #include "src/ThreadPool/ThreadPoolInterface.h"
#include "src/ThreadPool/ThreadEnvironment.h" #include "src/ThreadPool/ThreadEnvironment.h"
#include "src/ThreadPool/Barrier.h"
#include "src/ThreadPool/NonBlockingThreadPool.h" #include "src/ThreadPool/NonBlockingThreadPool.h"
#endif #endif
@ -62,4 +72,3 @@
#include <Eigen/src/Core/util/ReenableStupidWarnings.h> #include <Eigen/src/Core/util/ReenableStupidWarnings.h>
#endif // EIGEN_CXX11_THREADPOOL_MODULE #endif // EIGEN_CXX11_THREADPOOL_MODULE

View File

@ -189,7 +189,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) {
if (TensorEvaluator<LeftArgType, Device>::RawAccess && if (TensorEvaluator<LeftArgType, Device>::RawAccess &&
m_leftImpl.data() != nullptr) { m_leftImpl.data() != NULL) {
TensorBlock left_block(block->first_coeff_index(), block->block_sizes(), TensorBlock left_block(block->first_coeff_index(), block->block_sizes(),
block->tensor_strides(), block->tensor_strides(), block->tensor_strides(), block->tensor_strides(),
m_leftImpl.data() + block->first_coeff_index()); m_leftImpl.data() + block->first_coeff_index());

View File

@ -200,9 +200,9 @@ class TensorBase<Derived, ReadOnlyAccessors>
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived> EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived>
sigmoid() const { sigmoid() const {
return unaryExpr(internal::scalar_sigmoid_op<Scalar>()); return unaryExpr(internal::scalar_logistic_op<Scalar>());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC

View File

@ -62,7 +62,7 @@ struct cond<RowMajor> {
*/ */
enum TensorBlockShapeType { enum TensorBlockShapeType {
kUniformAllDims, kUniformAllDims,
kSkewedInnerDims, kSkewedInnerDims
}; };
struct TensorOpResourceRequirements { struct TensorOpResourceRequirements {
@ -73,7 +73,7 @@ struct TensorOpResourceRequirements {
// expression tree (like reductions) to communicate resources // expression tree (like reductions) to communicate resources
// requirements based on local state (like the total number of reductions // requirements based on local state (like the total number of reductions
// to be computed). // to be computed).
TensorOpResourceRequirements(internal::TensorBlockShapeType shape, TensorOpResourceRequirements(TensorBlockShapeType shape,
const Index size) const Index size)
: block_shape(shape), block_total_size(size) {} : block_shape(shape), block_total_size(size) {}
}; };
@ -90,9 +90,9 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
*block_shape = resources[0].block_shape; *block_shape = resources[0].block_shape;
*block_total_size = resources[0].block_total_size; *block_total_size = resources[0].block_total_size;
for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) { for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims && if (resources[i].block_shape == kSkewedInnerDims &&
*block_shape != TensorBlockShapeType::kSkewedInnerDims) { *block_shape != kSkewedInnerDims) {
*block_shape = TensorBlockShapeType::kSkewedInnerDims; *block_shape = kSkewedInnerDims;
} }
*block_total_size = *block_total_size =
numext::maxi(*block_total_size, resources[i].block_total_size); numext::maxi(*block_total_size, resources[i].block_total_size);
@ -152,11 +152,11 @@ struct TensorBlockCopyOp {
const Scalar* src_base = &src_data[src_index]; const Scalar* src_base = &src_data[src_index];
Scalar* dst_base = &dst_data[dst_index]; Scalar* dst_base = &dst_data[dst_index];
typedef const Eigen::Array<Scalar, Dynamic, 1> Src; typedef const Array<Scalar, Dynamic, 1> Src;
typedef Eigen::Array<Scalar, Dynamic, 1> Dst; typedef Array<Scalar, Dynamic, 1> Dst;
typedef Eigen::Map<Src, 0, InnerStride<>> SrcMap; typedef Map<Src, 0, InnerStride<> > SrcMap;
typedef Eigen::Map<Dst, 0, InnerStride<>> DstMap; typedef Map<Dst, 0, InnerStride<> > DstMap;
const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride)); const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride)); DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
@ -178,10 +178,8 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout,
bool BlockRead> bool BlockRead>
class TensorBlockIO { class TensorBlockIO {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock; typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp;
typedef typename internal::TensorBlockCopyOp<Scalar, StorageIndex>
TensorBlockCopyOp;
protected: protected:
struct BlockIteratorState { struct BlockIteratorState {
@ -194,7 +192,7 @@ class TensorBlockIO {
}; };
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy(
const TensorBlock& block, StorageIndex first_coeff_index, const Block& block, StorageIndex first_coeff_index,
const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data, const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data,
Scalar* dst_data) { Scalar* dst_data) {
@ -214,11 +212,11 @@ class TensorBlockIO {
num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1); num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1);
const StorageIndex block_dim_for_tensor_stride1_dim = const StorageIndex block_dim_for_tensor_stride1_dim =
NumDims == 0 ? 1 : tensor_to_block_dim_map[tensor_stride1_dim]; NumDims == 0 ? 1 : tensor_to_block_dim_map[tensor_stride1_dim];
Index block_inner_dim_size = StorageIndex block_inner_dim_size =
NumDims == 0 ? 1 NumDims == 0 ? 1
: block.block_sizes()[block_dim_for_tensor_stride1_dim]; : block.block_sizes()[block_dim_for_tensor_stride1_dim];
for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) { for (Index i = num_size_one_inner_dims + 1; i < NumDims; ++i) {
const int dim = cond<Layout>()(i, NumDims - i - 1); const Index dim = cond<Layout>()(i, NumDims - i - 1);
const StorageIndex block_stride = const StorageIndex block_stride =
block.block_strides()[tensor_to_block_dim_map[dim]]; block.block_strides()[tensor_to_block_dim_map[dim]];
if (block_inner_dim_size == block_stride && if (block_inner_dim_size == block_stride &&
@ -260,8 +258,8 @@ class TensorBlockIO {
// Initialize block iterator state. Squeeze away any dimension of size 1. // Initialize block iterator state. Squeeze away any dimension of size 1.
int num_squeezed_dims = 0; int num_squeezed_dims = 0;
for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) { for (Index i = num_size_one_inner_dims; i < NumDims - 1; ++i) {
const int dim = cond<Layout>()(i + 1, NumDims - i - 2); const Index dim = cond<Layout>()(i + 1, NumDims - i - 2);
const StorageIndex size = block.block_sizes()[tensor_to_block_dim_map[dim]]; const StorageIndex size = block.block_sizes()[tensor_to_block_dim_map[dim]];
if (size == 1) { if (size == 1) {
continue; continue;
@ -290,8 +288,8 @@ class TensorBlockIO {
const StorageIndex block_total_size = const StorageIndex block_total_size =
NumDims == 0 ? 1 : block.block_sizes().TotalSize(); NumDims == 0 ? 1 : block.block_sizes().TotalSize();
for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) { for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) {
TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride, BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride,
dst_data, inputIndex, input_stride, src_data); dst_data, inputIndex, input_stride, src_data);
// Update index. // Update index.
for (int j = 0; j < num_squeezed_dims; ++j) { for (int j = 0; j < num_squeezed_dims; ++j) {
if (++block_iter_state[j].count < block_iter_state[j].size) { if (++block_iter_state[j].count < block_iter_state[j].size) {
@ -320,13 +318,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims, class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
Layout, /*BlockRead=*/true> { Layout, /*BlockRead=*/true> {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock; typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base;
typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true>
Base;
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
TensorBlock* block, const Scalar* src_data) { Block* block, const Scalar* src_data) {
array<StorageIndex, NumDims> tensor_to_block_dim_map; array<StorageIndex, NumDims> tensor_to_block_dim_map;
for (int i = 0; i < NumDims; ++i) { for (int i = 0; i < NumDims; ++i) {
tensor_to_block_dim_map[i] = i; tensor_to_block_dim_map[i] = i;
@ -336,7 +332,7 @@ class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims,
} }
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
TensorBlock* block, StorageIndex first_coeff_index, Block* block, StorageIndex first_coeff_index,
const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) { const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) {
Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map, Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map,
@ -357,13 +353,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims, class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
Layout, /*BlockRead=*/false> { Layout, /*BlockRead=*/false> {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock; typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base;
typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false>
Base;
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
const TensorBlock& block, Scalar* dst_data) { const Block& block, Scalar* dst_data) {
array<StorageIndex, NumDims> tensor_to_block_dim_map; array<StorageIndex, NumDims> tensor_to_block_dim_map;
for (int i = 0; i < NumDims; ++i) { for (int i = 0; i < NumDims; ++i) {
tensor_to_block_dim_map[i] = i; tensor_to_block_dim_map[i] = i;
@ -373,7 +367,7 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims,
} }
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
const TensorBlock& block, StorageIndex first_coeff_index, const Block& block, StorageIndex first_coeff_index,
const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_to_block_dim_map,
const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) { const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) {
Base::Copy(block, first_coeff_index, tensor_to_block_dim_map, Base::Copy(block, first_coeff_index, tensor_to_block_dim_map,
@ -542,13 +536,13 @@ struct TensorBlockCwiseBinaryOp {
const StorageIndex left_stride, const LeftScalar* left_data, const StorageIndex left_stride, const LeftScalar* left_data,
const StorageIndex right_index, const StorageIndex right_stride, const StorageIndex right_index, const StorageIndex right_stride,
const RightScalar* right_data) { const RightScalar* right_data) {
typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs; typedef const Array<LeftScalar, Dynamic, 1> Lhs;
typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs; typedef const Array<RightScalar, Dynamic, 1> Rhs;
typedef Eigen::Array<OutputScalar, Dynamic, 1> Out; typedef Array<OutputScalar, Dynamic, 1> Out;
typedef Eigen::Map<Lhs, 0, InnerStride<>> LhsMap; typedef Map<Lhs, 0, InnerStride<> > LhsMap;
typedef Eigen::Map<Rhs, 0, InnerStride<>> RhsMap; typedef Map<Rhs, 0, InnerStride<> > RhsMap;
typedef Eigen::Map<Out, 0, InnerStride<>> OutMap; typedef Map<Out, 0, InnerStride<> > OutMap;
const LeftScalar* lhs_base = &left_data[left_index]; const LeftScalar* lhs_base = &left_data[left_index];
const RightScalar* rhs_base = &right_data[right_index]; const RightScalar* rhs_base = &right_data[right_index];
@ -558,8 +552,7 @@ struct TensorBlockCwiseBinaryOp {
const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride)); const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride));
OutMap out(out_base, num_coeff, InnerStride<>(output_stride)); OutMap out(out_base, num_coeff, InnerStride<>(output_stride));
out = out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
Eigen::CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor);
} }
}; };
@ -575,8 +568,7 @@ struct TensorBlockCwiseBinaryOp {
template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar, template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar,
int NumDims, int Layout> int NumDims, int Layout>
struct TensorBlockCwiseBinaryIO { struct TensorBlockCwiseBinaryIO {
typedef typename internal::TensorBlock<OutputScalar, StorageIndex, NumDims, typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions;
Layout>::Dimensions Dimensions;
struct BlockIteratorState { struct BlockIteratorState {
StorageIndex output_stride, output_span; StorageIndex output_stride, output_span;
@ -642,7 +634,7 @@ struct TensorBlockCwiseBinaryIO {
if (size == 1) { if (size == 1) {
continue; continue;
} }
auto& state = block_iter_state[num_squeezed_dims]; BlockIteratorState& state = block_iter_state[num_squeezed_dims];
state.output_stride = block_strides[dim]; state.output_stride = block_strides[dim];
state.left_stride = left_strides[dim]; state.left_stride = left_strides[dim];
state.right_stride = right_strides[dim]; state.right_stride = right_strides[dim];
@ -664,7 +656,7 @@ struct TensorBlockCwiseBinaryIO {
right_stride, right_data); right_stride, right_data);
// Update index. // Update index.
for (int j = 0; j < num_squeezed_dims; ++j) { for (int j = 0; j < num_squeezed_dims; ++j) {
auto& state = block_iter_state[j]; BlockIteratorState& state = block_iter_state[j];
if (++state.count < state.size) { if (++state.count < state.size) {
output_index += state.output_stride; output_index += state.output_stride;
left_index += state.left_stride; left_index += state.left_stride;
@ -768,15 +760,14 @@ struct TensorBlockView {
template <typename Scalar, typename StorageIndex, int NumDims, int Layout> template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorBlockMapper { class TensorBlockMapper {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock;
typedef DSizes<StorageIndex, NumDims> Dimensions; typedef DSizes<StorageIndex, NumDims> Dimensions;
TensorBlockMapper(const Dimensions& dims, TensorBlockMapper(const Dimensions& dims,
const TensorBlockShapeType block_shape, const TensorBlockShapeType block_shape,
Index min_target_size) Index min_target_size)
: m_dimensions(dims), : m_dimensions(dims),
m_block_dim_sizes(BlockDimensions(dims, block_shape, min_target_size)) { m_block_dim_sizes(BlockDimensions(dims, block_shape, internal::convert_index<StorageIndex>(min_target_size))) {
// Calculate block counts by dimension and total block count. // Calculate block counts by dimension and total block count.
DSizes<StorageIndex, NumDims> block_count; DSizes<StorageIndex, NumDims> block_count;
for (Index i = 0; i < block_count.rank(); ++i) { for (Index i = 0; i < block_count.rank(); ++i) {
@ -804,7 +795,7 @@ class TensorBlockMapper {
} }
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
GetBlockForIndex(StorageIndex block_index, Scalar* data) const { GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
StorageIndex first_coeff_index = 0; StorageIndex first_coeff_index = 0;
DSizes<StorageIndex, NumDims> coords; DSizes<StorageIndex, NumDims> coords;
@ -852,8 +843,7 @@ class TensorBlockMapper {
} }
} }
return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
data);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {
@ -868,8 +858,8 @@ class TensorBlockMapper {
private: private:
static Dimensions BlockDimensions(const Dimensions& tensor_dims, static Dimensions BlockDimensions(const Dimensions& tensor_dims,
const TensorBlockShapeType block_shape, const TensorBlockShapeType block_shape,
Index min_target_size) { StorageIndex min_target_size) {
min_target_size = numext::maxi<Index>(1, min_target_size); min_target_size = numext::maxi<StorageIndex>(1, min_target_size);
// If tensor fully fits into the target size, we'll treat it a single block. // If tensor fully fits into the target size, we'll treat it a single block.
Dimensions block_dim_sizes = tensor_dims; Dimensions block_dim_sizes = tensor_dims;
@ -883,12 +873,12 @@ class TensorBlockMapper {
block_dim_sizes[i] = 1; block_dim_sizes[i] = 1;
} }
} else if (block_dim_sizes.TotalSize() > min_target_size) { } else if (block_dim_sizes.TotalSize() > min_target_size) {
if (block_shape == TensorBlockShapeType::kUniformAllDims) { if (block_shape == kUniformAllDims) {
// Tensor will not fit within 'min_target_size' budget: calculate tensor // Tensor will not fit within 'min_target_size' budget: calculate tensor
// block dimension sizes based on "square" dimension size target. // block dimension sizes based on "square" dimension size target.
const Index dim_size_target = static_cast<Index>( const StorageIndex dim_size_target = internal::convert_index<StorageIndex>(
std::pow(static_cast<float>(min_target_size), std::pow(static_cast<float>(min_target_size),
1.0 / static_cast<float>(block_dim_sizes.rank()))); 1.0f / static_cast<float>(block_dim_sizes.rank())));
for (Index i = 0; i < block_dim_sizes.rank(); ++i) { for (Index i = 0; i < block_dim_sizes.rank(); ++i) {
// TODO(andydavis) Adjust the inner most 'block_dim_size' to make it // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it
// a multiple of the packet size. Note that reducing // a multiple of the packet size. Note that reducing
@ -913,7 +903,7 @@ class TensorBlockMapper {
total_size = total_size_other_dims * block_dim_sizes[dim]; total_size = total_size_other_dims * block_dim_sizes[dim];
} }
} }
} else if (block_shape == TensorBlockShapeType::kSkewedInnerDims) { } else if (block_shape == kSkewedInnerDims) {
StorageIndex coeff_to_allocate = min_target_size; StorageIndex coeff_to_allocate = min_target_size;
for (int i = 0; i < NumDims; ++i) { for (int i = 0; i < NumDims; ++i) {
const int dim = cond<Layout>()(i, NumDims - i - 1); const int dim = cond<Layout>()(i, NumDims - i - 1);
@ -929,8 +919,9 @@ class TensorBlockMapper {
} }
} }
eigen_assert(block_dim_sizes.TotalSize() >= eigen_assert(
numext::mini<Index>(min_target_size, tensor_dims.TotalSize())); block_dim_sizes.TotalSize() >=
numext::mini<Index>(min_target_size, tensor_dims.TotalSize()));
return block_dim_sizes; return block_dim_sizes;
} }
@ -957,8 +948,7 @@ class TensorBlockMapper {
template <typename Scalar, typename StorageIndex, int NumDims, int Layout> template <typename Scalar, typename StorageIndex, int NumDims, int Layout>
class TensorSliceBlockMapper { class TensorSliceBlockMapper {
public: public:
typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block;
TensorBlock;
typedef DSizes<StorageIndex, NumDims> Dimensions; typedef DSizes<StorageIndex, NumDims> Dimensions;
TensorSliceBlockMapper(const Dimensions& tensor_dims, TensorSliceBlockMapper(const Dimensions& tensor_dims,
@ -974,7 +964,7 @@ class TensorSliceBlockMapper {
m_total_block_count(1) { m_total_block_count(1) {
// Calculate block counts by dimension and total block count. // Calculate block counts by dimension and total block count.
DSizes<StorageIndex, NumDims> block_count; DSizes<StorageIndex, NumDims> block_count;
for (size_t i = 0; i < block_count.rank(); ++i) { for (Index i = 0; i < block_count.rank(); ++i) {
block_count[i] = divup(m_tensor_slice_extents[i], m_block_dim_sizes[i]); block_count[i] = divup(m_tensor_slice_extents[i], m_block_dim_sizes[i]);
} }
m_total_block_count = array_prod(block_count); m_total_block_count = array_prod(block_count);
@ -999,7 +989,7 @@ class TensorSliceBlockMapper {
} }
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block
GetBlockForIndex(StorageIndex block_index, Scalar* data) const { GetBlockForIndex(StorageIndex block_index, Scalar* data) const {
StorageIndex first_coeff_index = 0; StorageIndex first_coeff_index = 0;
DSizes<StorageIndex, NumDims> coords; DSizes<StorageIndex, NumDims> coords;
@ -1056,8 +1046,7 @@ class TensorSliceBlockMapper {
} }
} }
return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, return Block(first_coeff_index, sizes, strides, m_tensor_strides, data);
data);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const {

View File

@ -105,7 +105,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
bool isCopy= false, nByOne = false, oneByN = false; bool isCopy, nByOne, oneByN;
enum { enum {
IsAligned = true, IsAligned = true,
@ -134,9 +134,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
const Device& device) const Device& device)
: m_device(device), : isCopy(false), nByOne(false), oneByN(false),
m_broadcast(op.broadcast()), m_device(device), m_broadcast(op.broadcast()), m_impl(op.expression(), device)
m_impl(op.expression(), device) { {
// The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
// and store the result in a scalar. Instead one should reshape the scalar into a a N-D // and store the result in a scalar. Instead one should reshape the scalar into a a N-D
// tensor with N >= 1 of 1 element first and then broadcast. // tensor with N >= 1 of 1 element first and then broadcast.

View File

@ -152,13 +152,7 @@ struct TensorContractionParams {
// 1. Elementwise Relu transformation following Conv2D. // 1. Elementwise Relu transformation following Conv2D.
// 2. AddBias to the Conv2D output channels dimension. // 2. AddBias to the Conv2D output channels dimension.
// //
// See expected implementation in NoOpOutputKernel. // The NoOpOutputKernel implements an output kernel that does absolutely nothing.
struct OutputKernel {
template <typename Index, typename Scalar>
using OutputMapper = internal::blas_data_mapper<Scalar, Index, ColMajor>;
};
// Output kernel that does absolutely nothing.
struct NoOpOutputKernel { struct NoOpOutputKernel {
/** /**
* Tensor contraction evaluator calls this kernel after finishing each block * Tensor contraction evaluator calls this kernel after finishing each block
@ -177,7 +171,7 @@ struct NoOpOutputKernel {
*/ */
template <typename Index, typename Scalar> template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()( EIGEN_ALWAYS_INLINE void operator()(
const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/, const internal::blas_data_mapper<Scalar, Index, ColMajor>& /*output_mapper*/,
const TensorContractionParams& /*params*/, Index /*i*/, const TensorContractionParams& /*params*/, Index /*i*/,
Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {} Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {}
}; };
@ -354,7 +348,7 @@ struct TensorContractionEvaluatorBase
// dimensions and right non-contracting dimensions. // dimensions and right non-contracting dimensions.
m_lhs_inner_dim_contiguous = true; m_lhs_inner_dim_contiguous = true;
int dim_idx = 0; int dim_idx = 0;
unsigned int nocontract_idx = 0; Index nocontract_idx = 0;
for (int i = 0; i < LDims; i++) { for (int i = 0; i < LDims; i++) {
// find if we are contracting on index i of left tensor // find if we are contracting on index i of left tensor
@ -667,7 +661,7 @@ struct TensorContractionEvaluatorBase
// call gebp (matrix kernel) // call gebp (matrix kernel)
// The parameters here are copied from Eigen's GEMM implementation // The parameters here are copied from Eigen's GEMM implementation
const auto output_mapper = output.getSubMapper(i2, j2); const OutputMapper output_mapper = output.getSubMapper(i2, j2);
gebp(output_mapper, blockA, blockB, actual_mc, actual_kc, actual_nc, gebp(output_mapper, blockA, blockB, actual_mc, actual_kc, actual_nc,
Scalar(1), -1, -1, 0, 0); Scalar(1), -1, -1, 0, 0);

View File

@ -88,6 +88,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
enum { enum {
IsAligned = false, IsAligned = false,
@ -107,12 +108,12 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
if (data) { if (data) {
evalTo(data); evalTo(data);
return false; return false;
} else { } else {
m_result = static_cast<CoeffReturnType*>( m_result = static_cast<PointerT>(
m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar)));
evalTo(m_result); evalTo(m_result);
return true; return true;
@ -140,23 +141,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_result; } EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
#endif #endif
protected: protected:
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result( TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions);
data, m_dimensions);
m_op.func().eval(m_op.expression(), result, m_device); m_op.func().eval(m_op.expression(), result, m_device);
} }
Dimensions m_dimensions; Dimensions m_dimensions;
const ArgType m_op; const ArgType m_op;
const Device& m_device; const Device& m_device;
CoeffReturnType* m_result; PointerT m_result;
}; };
@ -251,6 +251,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = PacketType<CoeffReturnType, Device>::size; static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
typedef typename PointerType<CoeffReturnType, Device>::Type PointerT;
enum { enum {
IsAligned = false, IsAligned = false,
@ -270,12 +271,12 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) {
if (data) { if (data) {
evalTo(data); evalTo(data);
return false; return false;
} else { } else {
m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType)));
evalTo(m_result); evalTo(m_result);
return true; return true;
} }
@ -302,22 +303,22 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType,
return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
} }
EIGEN_DEVICE_FUNC typename internal::traits<XprType>::PointerType data() const { return m_result; } EIGEN_DEVICE_FUNC PointerT data() const { return m_result; }
#ifdef EIGEN_USE_SYCL #ifdef EIGEN_USE_SYCL
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; }
#endif #endif
protected: protected:
EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { EIGEN_DEVICE_FUNC void evalTo(PointerT data) {
TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions); TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions);
m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device);
} }
Dimensions m_dimensions; Dimensions m_dimensions;
const XprType m_op; const XprType m_op;
const Device& m_device; const Device& m_device;
CoeffReturnType* m_result; PointerT m_result;
}; };

View File

@ -12,56 +12,6 @@
namespace Eigen { namespace Eigen {
// Barrier is an object that allows one or more threads to wait until
// Notify has been called a specified number of times.
class Barrier {
public:
Barrier(unsigned int count) : state_(count << 1), notified_(false) {
eigen_assert(((count << 1) >> 1) == count);
}
~Barrier() {
eigen_assert((state_>>1) == 0);
}
void Notify() {
unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
if (v != 1) {
eigen_assert(((v + 2) & ~1) != 0);
return; // either count has not dropped to 0, or waiter is not waiting
}
std::unique_lock<std::mutex> l(mu_);
eigen_assert(!notified_);
notified_ = true;
cv_.notify_all();
}
void Wait() {
unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel);
if ((v >> 1) == 0) return;
std::unique_lock<std::mutex> l(mu_);
while (!notified_) {
cv_.wait(l);
}
}
private:
std::mutex mu_;
std::condition_variable cv_;
std::atomic<unsigned int> state_; // low bit is waiter flag
bool notified_;
};
// Notification is an object that allows a user to to wait for another
// thread to signal a notification that an event has occurred.
//
// Multiple threads can wait on the same Notification object,
// but only one caller must call Notify() on the object.
struct Notification : Barrier {
Notification() : Barrier(1) {};
};
// Runs an arbitrary function and then calls Notify() on the passed in // Runs an arbitrary function and then calls Notify() on the passed in
// Notification. // Notification.
template <typename Function, typename... Args> struct FunctionWrapperWithNotification template <typename Function, typename... Args> struct FunctionWrapperWithNotification
@ -102,7 +52,7 @@ class Allocator {
// Build a thread pool device on top the an existing pool of threads. // Build a thread pool device on top the an existing pool of threads.
struct ThreadPoolDevice { struct ThreadPoolDevice {
// The ownership of the thread pool remains with the caller. // The ownership of the thread pool remains with the caller.
ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr) ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = NULL)
: pool_(pool), num_threads_(num_cores), allocator_(allocator) { } : pool_(pool), num_threads_(num_cores), allocator_(allocator) { }
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
@ -282,7 +232,7 @@ struct ThreadPoolDevice {
// Convenience wrapper for parallelFor that does not align blocks. // Convenience wrapper for parallelFor that does not align blocks.
void parallelFor(Index n, const TensorOpCost& cost, void parallelFor(Index n, const TensorOpCost& cost,
std::function<void(Index, Index)> f) const { std::function<void(Index, Index)> f) const {
parallelFor(n, cost, nullptr, std::move(f)); parallelFor(n, cost, NULL, std::move(f));
} }
// Thread pool accessor. // Thread pool accessor.

View File

@ -32,12 +32,12 @@ namespace Eigen {
// Boilerplate code // Boilerplate code
namespace internal { namespace internal {
template<std::size_t n, typename Dimension> struct dget { template<std::ptrdiff_t n, typename Dimension> struct dget {
static const std::ptrdiff_t value = get<n, Dimension>::value; static const std::ptrdiff_t value = get<n, Dimension>::value;
}; };
template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
struct fixed_size_tensor_index_linearization_helper struct fixed_size_tensor_index_linearization_helper
{ {
template <typename Dimensions> EIGEN_DEVICE_FUNC template <typename Dimensions> EIGEN_DEVICE_FUNC
@ -50,7 +50,7 @@ struct fixed_size_tensor_index_linearization_helper
} }
}; };
template<typename Index, std::size_t NumIndices, bool RowMajor> template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor>
struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
{ {
template <typename Dimensions> EIGEN_DEVICE_FUNC template <typename Dimensions> EIGEN_DEVICE_FUNC
@ -60,7 +60,7 @@ struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMaj
} }
}; };
template<typename Index, std::size_t n> template<typename Index, std::ptrdiff_t n>
struct fixed_size_tensor_index_extraction_helper struct fixed_size_tensor_index_extraction_helper
{ {
template <typename Dimensions> EIGEN_DEVICE_FUNC template <typename Dimensions> EIGEN_DEVICE_FUNC
@ -94,7 +94,7 @@ struct Sizes {
typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base; typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base;
const Base t = Base(); const Base t = Base();
static const std::ptrdiff_t total_size = internal::arg_prod(Indices...); static const std::ptrdiff_t total_size = internal::arg_prod(Indices...);
static const size_t count = Base::count; static const ptrdiff_t count = Base::count;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const {
return Base::count; return Base::count;
@ -121,16 +121,16 @@ struct Sizes {
return *this; return *this;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::ptrdiff_t index) const {
return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count>::run(index, t); return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count>::run(index, t);
} }
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, t); return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, t);
} }
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, t); return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, t);
} }
}; };
@ -144,25 +144,25 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indi
#else #else
template <std::size_t n> template <std::ptrdiff_t n>
struct non_zero_size { struct non_zero_size {
typedef internal::type2val<std::size_t, n> type; typedef internal::type2val<std::ptrdiff_t, n> type;
}; };
template <> template <>
struct non_zero_size<0> { struct non_zero_size<0> {
typedef internal::null_type type; typedef internal::null_type type;
}; };
template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes { template <std::ptrdiff_t V1=0, std::ptrdiff_t V2=0, std::ptrdiff_t V3=0, std::ptrdiff_t V4=0, std::ptrdiff_t V5=0> struct Sizes {
typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base; typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base;
static const size_t count = Base::count; static const std::ptrdiff_t count = Base::count;
static const std::size_t total_size = internal::arg_prod<Base>::value; static const std::ptrdiff_t total_size = internal::arg_prod<Base>::value;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t rank() const {
return count; return count;
} }
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t TotalSize() {
return internal::arg_prod<Base>::value; return internal::arg_prod<Base>::value;
} }
@ -178,7 +178,7 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
#if EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_HAS_VARIADIC_TEMPLATES
template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { } template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { }
explicit Sizes(std::initializer_list<std::size_t>) { explicit Sizes(std::initializer_list<std::ptrdiff_t>) {
// todo: add assertion // todo: add assertion
} }
#else #else
@ -213,18 +213,18 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0
} }
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *reinterpret_cast<const Base*>(this)); return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *reinterpret_cast<const Base*>(this));
} }
template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const {
return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *reinterpret_cast<const Base*>(this)); return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *reinterpret_cast<const Base*>(this));
} }
}; };
namespace internal { namespace internal {
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) {
return Sizes<V1, V2, V3, V4, V5>::total_size; return Sizes<V1, V2, V3, V4, V5>::total_size;
} }
} }
@ -233,7 +233,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2,
// Boilerplate // Boilerplate
namespace internal { namespace internal {
template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
struct tensor_index_linearization_helper struct tensor_index_linearization_helper
{ {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -245,7 +245,7 @@ struct tensor_index_linearization_helper
} }
}; };
template<typename Index, std::size_t NumIndices, bool RowMajor> template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor>
struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor>
{ {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -264,7 +264,7 @@ struct DSizes : array<DenseIndex, NumDims> {
typedef array<DenseIndex, NumDims> Base; typedef array<DenseIndex, NumDims> Base;
static const int count = NumDims; static const int count = NumDims;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const {
return NumDims; return NumDims;
} }
@ -298,7 +298,7 @@ struct DSizes : array<DenseIndex, NumDims> {
} }
} }
#else #else
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5>
EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) { EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) {
for (int i = 0 ; i < NumDims; ++i) { for (int i = 0 ; i < NumDims; ++i) {
(*this)[i] = a[i]; (*this)[i] = a[i];
@ -359,7 +359,7 @@ struct DSizes : array<DenseIndex, NumDims> {
// Boilerplate // Boilerplate
namespace internal { namespace internal {
template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor>
struct tensor_vsize_index_linearization_helper struct tensor_vsize_index_linearization_helper
{ {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -371,7 +371,7 @@ struct tensor_vsize_index_linearization_helper
} }
}; };
template<typename Index, std::size_t NumIndices, bool RowMajor> template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor>
struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor> struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor>
{ {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -386,10 +386,10 @@ struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor>
namespace internal { namespace internal {
template <typename DenseIndex, int NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > { template <typename DenseIndex, int NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > {
static const size_t value = NumDims; static const ptrdiff_t value = NumDims;
}; };
template <typename DenseIndex, int NumDims> struct array_size<DSizes<DenseIndex, NumDims> > { template <typename DenseIndex, int NumDims> struct array_size<DSizes<DenseIndex, NumDims> > {
static const size_t value = NumDims; static const ptrdiff_t value = NumDims;
}; };
#ifndef EIGEN_EMULATE_CXX11_META_H #ifndef EIGEN_EMULATE_CXX11_META_H
template <typename std::ptrdiff_t... Indices> struct array_size<const Sizes<Indices...> > { template <typename std::ptrdiff_t... Indices> struct array_size<const Sizes<Indices...> > {
@ -399,33 +399,33 @@ template <typename std::ptrdiff_t... Indices> struct array_size<Sizes<Indices...
static const std::ptrdiff_t value = Sizes<Indices...>::count; static const std::ptrdiff_t value = Sizes<Indices...>::count;
}; };
template <std::ptrdiff_t n, typename std::ptrdiff_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) { template <std::ptrdiff_t n, typename std::ptrdiff_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) {
return get<n, internal::numeric_list<std::size_t, Indices...> >::value; return get<n, internal::numeric_list<std::ptrdiff_t, Indices...> >::value;
} }
template <std::ptrdiff_t n> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { template <std::ptrdiff_t n> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) {
eigen_assert(false && "should never be called"); eigen_assert(false && "should never be called");
return -1; return -1;
} }
#else #else
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > { template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > {
static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count;
}; };
template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > { template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > {
static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count;
}; };
template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) { template <std::ptrdiff_t n, std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<V1,V2,V3,V4,V5>&) {
return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value; return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value;
} }
#endif #endif
template <typename Dims1, typename Dims2, size_t n, size_t m> template <typename Dims1, typename Dims2, ptrdiff_t n, ptrdiff_t m>
struct sizes_match_below_dim { struct sizes_match_below_dim {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) {
return false; return false;
} }
}; };
template <typename Dims1, typename Dims2, size_t n> template <typename Dims1, typename Dims2, ptrdiff_t n>
struct sizes_match_below_dim<Dims1, Dims2, n, n> { struct sizes_match_below_dim<Dims1, Dims2, n, n> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) {
return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) & return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) &

View File

@ -133,7 +133,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable,
if (needs_assign) { if (needs_assign) {
// Size tensor blocks to fit in cache (or requested target block size). // Size tensor blocks to fit in cache (or requested target block size).
Index block_total_size = numext::mini(cache_size, total_size); Index block_total_size = numext::mini(cache_size, total_size);
TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; TensorBlockShapeType block_shape = kSkewedInnerDims;
// Query expression tree for desired block size/shape. // Query expression tree for desired block size/shape.
std::vector<TensorOpResourceRequirements> resources; std::vector<TensorOpResourceRequirements> resources;
evaluator.getResourceRequirements(&resources); evaluator.getResourceRequirements(&resources);
@ -229,12 +229,8 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> {
typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange; typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange;
Evaluator evaluator(expr, device); Evaluator evaluator(expr, device);
const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign) { if (needs_assign) {
const StorageIndex PacketSize =
Vectorizable
? unpacket_traits<typename Evaluator::PacketReturnType>::size
: 1;
const StorageIndex size = array_prod(evaluator.dimensions()); const StorageIndex size = array_prod(evaluator.dimensions());
device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), device.parallelFor(size, evaluator.costPerCoeff(Vectorizable),
EvalRange::alignBlockSize, EvalRange::alignBlockSize,
@ -259,12 +255,11 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
static EIGEN_STRONG_INLINE void run(const Expression& expr, static EIGEN_STRONG_INLINE void run(const Expression& expr,
const ThreadPoolDevice& device) { const ThreadPoolDevice& device) {
typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock;
typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper; typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper;
Evaluator evaluator(expr, device); Evaluator evaluator(expr, device);
StorageIndex total_size = array_prod(evaluator.dimensions()); Index total_size = array_prod(evaluator.dimensions());
StorageIndex cache_size = device.firstLevelCacheSize() / sizeof(Scalar); Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar);
if (total_size < cache_size) { if (total_size < cache_size) {
// TODO(andydavis) Reduce block management overhead for small tensors. // TODO(andydavis) Reduce block management overhead for small tensors.
internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable,
@ -273,9 +268,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr
return; return;
} }
const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
if (needs_assign) { if (needs_assign) {
TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; TensorBlockShapeType block_shape = kSkewedInnerDims;
Index block_total_size = 0; Index block_total_size = 0;
// Query expression tree for desired block size/shape. // Query expression tree for desired block size/shape.
std::vector<internal::TensorOpResourceRequirements> resources; std::vector<internal::TensorOpResourceRequirements> resources;

View File

@ -24,6 +24,14 @@ template<typename T> struct MakePointer {
typedef T ScalarType; typedef T ScalarType;
}; };
// The PointerType class is a container of the device specefic pointer
// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression
// is a device-agnostic type and need MakePointer class for type conversion,
// the TensorEvaluator calss can be specialized for a device, hence it is possible
// to construct different types of temproray storage memory in TensorEvaluator
// for different devices by specializing the following PointerType class.
template<typename T, typename Device> struct PointerType : MakePointer<T>{};
namespace internal{ namespace internal{
template<typename A, typename B> struct Pointer_type_promotion { template<typename A, typename B> struct Pointer_type_promotion {
static const bool val=false; static const bool val=false;

View File

@ -54,36 +54,6 @@ struct functor_traits<scalar_fmod_op<Scalar> > {
PacketAccess = false }; PacketAccess = false };
}; };
/** \internal
* \brief Template functor to compute the sigmoid of a scalar
* \sa class CwiseUnaryOp, ArrayBase::sigmoid()
*/
template <typename T>
struct scalar_sigmoid_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
const T one = T(1);
return one / (one + numext::exp(-x));
}
template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet packetOp(const Packet& x) const {
const Packet one = pset1<Packet>(T(1));
return pdiv(one, padd(one, pexp(pnegate(x))));
}
};
template <typename T>
struct functor_traits<scalar_sigmoid_op<T> > {
enum {
Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
packet_traits<T>::HasNegate && packet_traits<T>::HasExp
};
};
template<typename Reducer, typename Device> template<typename Reducer, typename Device>
struct reducer_traits { struct reducer_traits {
enum { enum {

View File

@ -84,7 +84,7 @@ template<DenseIndex n> struct NumTraits<type2index<n> >
namespace internal { namespace internal {
template <typename T> template <typename T>
EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) { EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) {
val = new_val; val = internal::convert_index<T>(new_val);
} }
template <DenseIndex n> template <DenseIndex n>
EIGEN_DEVICE_FUNC void update_value(type2index<n>& val, DenseIndex new_val) { EIGEN_DEVICE_FUNC void update_value(type2index<n>& val, DenseIndex new_val) {

View File

@ -527,7 +527,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
{ {
for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) {
eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
} }
@ -985,7 +985,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
// Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped; DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
m_is_identity = true; m_is_identity = true;
for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) {
if (m_strides[i] != 1 || op.startIndices()[i] != 0 || if (m_strides[i] != 1 || op.startIndices()[i] != 0 ||
op.stopIndices()[i] != (m_impl.dimensions()[i] - 1)) { op.stopIndices()[i] != (m_impl.dimensions()[i] - 1)) {
m_is_identity = false; m_is_identity = false;

View File

@ -0,0 +1,64 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2018 Rasmus Munk Larsen <rmlarsen@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// Barrier is an object that allows one or more threads to wait until
// Notify has been called a specified number of times.
#ifndef EIGEN_CXX11_THREADPOOL_BARRIER_H
#define EIGEN_CXX11_THREADPOOL_BARRIER_H
namespace Eigen {
class Barrier {
public:
Barrier(unsigned int count) : state_(count << 1), notified_(false) {
eigen_assert(((count << 1) >> 1) == count);
}
~Barrier() { eigen_plain_assert((state_ >> 1) == 0); }
void Notify() {
unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
if (v != 1) {
eigen_assert(((v + 2) & ~1) != 0);
return; // either count has not dropped to 0, or waiter is not waiting
}
std::unique_lock<std::mutex> l(mu_);
eigen_assert(!notified_);
notified_ = true;
cv_.notify_all();
}
void Wait() {
unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel);
if ((v >> 1) == 0) return;
std::unique_lock<std::mutex> l(mu_);
while (!notified_) {
cv_.wait(l);
}
}
private:
std::mutex mu_;
std::condition_variable cv_;
std::atomic<unsigned int> state_; // low bit is waiter flag
bool notified_;
};
// Notification is an object that allows a user to to wait for another
// thread to signal a notification that an event has occurred.
//
// Multiple threads can wait on the same Notification object,
// but only one caller must call Notify() on the object.
struct Notification : Barrier {
Notification() : Barrier(1){};
};
} // namespace Eigen
#endif // EIGEN_CXX11_THREADPOOL_BARRIER_H

View File

@ -58,7 +58,7 @@ class EventCount {
~EventCount() { ~EventCount() {
// Ensure there are no waiters. // Ensure there are no waiters.
eigen_assert((state_.load() & (kStackMask | kWaiterMask)) == kStackMask); eigen_plain_assert((state_.load() & (kStackMask | kWaiterMask)) == kStackMask);
} }
// Prewait prepares for waiting. // Prewait prepares for waiting.
@ -169,7 +169,8 @@ class EventCount {
class Waiter { class Waiter {
friend class EventCount; friend class EventCount;
// Align to 128 byte boundary to prevent false sharing with other Waiter objects in the same vector. // Align to 128 byte boundary to prevent false sharing with other Waiter
// objects in the same vector.
EIGEN_ALIGN_TO_BOUNDARY(128) std::atomic<Waiter*> next; EIGEN_ALIGN_TO_BOUNDARY(128) std::atomic<Waiter*> next;
std::mutex mu; std::mutex mu;
std::condition_variable cv; std::condition_variable cv;

View File

@ -10,7 +10,6 @@
#ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H #ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
#define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H #define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H
namespace Eigen { namespace Eigen {
template <typename Environment> template <typename Environment>
@ -23,7 +22,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
: ThreadPoolTempl(num_threads, true, env) {} : ThreadPoolTempl(num_threads, true, env) {}
ThreadPoolTempl(int num_threads, bool allow_spinning, ThreadPoolTempl(int num_threads, bool allow_spinning,
Environment env = Environment()) Environment env = Environment())
: env_(env), : env_(env),
num_threads_(num_threads), num_threads_(num_threads),
allow_spinning_(allow_spinning), allow_spinning_(allow_spinning),
@ -58,12 +57,18 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
coprimes_.push_back(i); coprimes_.push_back(i);
} }
} }
queues_.resize(num_threads_);
#ifndef EIGEN_THREAD_LOCAL
init_barrier_.reset(new Barrier(num_threads_));
#endif
for (int i = 0; i < num_threads_; i++) { for (int i = 0; i < num_threads_; i++) {
queues_.push_back(new Queue()); threads_.emplace_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
}
for (int i = 0; i < num_threads_; i++) {
threads_.push_back(env_.CreateThread([this, i]() { WorkerLoop(i); }));
} }
#ifndef EIGEN_THREAD_LOCAL
// Wait for workers to initialize per_thread_map_. Otherwise we might race
// with them in Schedule or CurrentThreadId.
init_barrier_->Wait();
#endif
} }
~ThreadPoolTempl() { ~ThreadPoolTempl() {
@ -78,13 +83,13 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// Since we were cancelled, there might be entries in the queues. // Since we were cancelled, there might be entries in the queues.
// Empty them to prevent their destructor from asserting. // Empty them to prevent their destructor from asserting.
for (size_t i = 0; i < queues_.size(); i++) { for (size_t i = 0; i < queues_.size(); i++) {
queues_[i]->Flush(); queues_[i].Flush();
} }
} }
// Join threads explicitly to avoid destruction order issues. // Join threads explicitly to avoid destruction order issues.
for (size_t i = 0; i < num_threads_; i++) delete threads_[i]; threads_.resize(0);
for (size_t i = 0; i < num_threads_; i++) delete queues_[i]; queues_.resize(0);
} }
void Schedule(std::function<void()> fn) { void Schedule(std::function<void()> fn) {
@ -92,13 +97,13 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
PerThread* pt = GetPerThread(); PerThread* pt = GetPerThread();
if (pt->pool == this) { if (pt->pool == this) {
// Worker thread of this pool, push onto the thread's queue. // Worker thread of this pool, push onto the thread's queue.
Queue* q = queues_[pt->thread_id]; Queue& q = queues_[pt->thread_id];
t = q->PushFront(std::move(t)); t = q.PushFront(std::move(t));
} else { } else {
// A free-standing thread (or worker of another pool), push onto a random // A free-standing thread (or worker of another pool), push onto a random
// queue. // queue.
Queue* q = queues_[Rand(&pt->rand) % queues_.size()]; Queue& q = queues_[Rand(&pt->rand) % queues_.size()];
t = q->PushBack(std::move(t)); t = q.PushBack(std::move(t));
} }
// Note: below we touch this after making w available to worker threads. // Note: below we touch this after making w available to worker threads.
// Strictly speaking, this can lead to a racy-use-after-free. Consider that // Strictly speaking, this can lead to a racy-use-after-free. Consider that
@ -109,8 +114,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// this is kept alive while any threads can potentially be in Schedule. // this is kept alive while any threads can potentially be in Schedule.
if (!t.f) { if (!t.f) {
ec_.Notify(false); ec_.Notify(false);
} } else {
else {
env_.ExecuteTask(t); // Push failed, execute directly. env_.ExecuteTask(t); // Push failed, execute directly.
} }
} }
@ -130,13 +134,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
ec_.Notify(true); ec_.Notify(true);
} }
int NumThreads() const final { int NumThreads() const final { return num_threads_; }
return num_threads_;
}
int CurrentThreadId() const final { int CurrentThreadId() const final {
const PerThread* pt = const PerThread* pt = const_cast<ThreadPoolTempl*>(this)->GetPerThread();
const_cast<ThreadPoolTempl*>(this)->GetPerThread();
if (pt->pool == this) { if (pt->pool == this) {
return pt->thread_id; return pt->thread_id;
} else { } else {
@ -148,17 +149,21 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
typedef typename Environment::EnvThread Thread; typedef typename Environment::EnvThread Thread;
struct PerThread { struct PerThread {
constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) { } constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) {}
ThreadPoolTempl* pool; // Parent pool, or null for normal threads. ThreadPoolTempl* pool; // Parent pool, or null for normal threads.
uint64_t rand; // Random generator state. uint64_t rand; // Random generator state.
int thread_id; // Worker thread index in pool. int thread_id; // Worker thread index in pool.
#ifndef EIGEN_THREAD_LOCAL
// Prevent false sharing.
char pad_[128];
#endif
}; };
Environment env_; Environment env_;
const int num_threads_; const int num_threads_;
const bool allow_spinning_; const bool allow_spinning_;
MaxSizeVector<Thread*> threads_; MaxSizeVector<std::unique_ptr<Thread> > threads_;
MaxSizeVector<Queue*> queues_; MaxSizeVector<Queue> queues_;
MaxSizeVector<unsigned> coprimes_; MaxSizeVector<unsigned> coprimes_;
MaxSizeVector<EventCount::Waiter> waiters_; MaxSizeVector<EventCount::Waiter> waiters_;
std::atomic<unsigned> blocked_; std::atomic<unsigned> blocked_;
@ -166,14 +171,27 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
std::atomic<bool> done_; std::atomic<bool> done_;
std::atomic<bool> cancelled_; std::atomic<bool> cancelled_;
EventCount ec_; EventCount ec_;
#ifndef EIGEN_THREAD_LOCAL
std::unique_ptr<Barrier> init_barrier_;
std::mutex per_thread_map_mutex_; // Protects per_thread_map_.
std::unordered_map<uint64_t, std::unique_ptr<PerThread>> per_thread_map_;
#endif
// Main worker thread loop. // Main worker thread loop.
void WorkerLoop(int thread_id) { void WorkerLoop(int thread_id) {
#ifndef EIGEN_THREAD_LOCAL
std::unique_ptr<PerThread> new_pt(new PerThread());
per_thread_map_mutex_.lock();
eigen_assert(per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second);
per_thread_map_mutex_.unlock();
init_barrier_->Notify();
init_barrier_->Wait();
#endif
PerThread* pt = GetPerThread(); PerThread* pt = GetPerThread();
pt->pool = this; pt->pool = this;
pt->rand = std::hash<std::thread::id>()(std::this_thread::get_id()); pt->rand = GlobalThreadIdHash();
pt->thread_id = thread_id; pt->thread_id = thread_id;
Queue* q = queues_[thread_id]; Queue& q = queues_[thread_id];
EventCount::Waiter* waiter = &waiters_[thread_id]; EventCount::Waiter* waiter = &waiters_[thread_id];
// TODO(dvyukov,rmlarsen): The time spent in Steal() is proportional // TODO(dvyukov,rmlarsen): The time spent in Steal() is proportional
// to num_threads_ and we assume that new work is scheduled at a // to num_threads_ and we assume that new work is scheduled at a
@ -189,10 +207,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// counter-productive for the types of I/O workloads the single thread // counter-productive for the types of I/O workloads the single thread
// pools tend to be used for. // pools tend to be used for.
while (!cancelled_) { while (!cancelled_) {
Task t = q->PopFront(); Task t = q.PopFront();
for (int i = 0; i < spin_count && !t.f; i++) { for (int i = 0; i < spin_count && !t.f; i++) {
if (!cancelled_.load(std::memory_order_relaxed)) { if (!cancelled_.load(std::memory_order_relaxed)) {
t = q->PopFront(); t = q.PopFront();
} }
} }
if (!t.f) { if (!t.f) {
@ -206,7 +224,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
} }
} else { } else {
while (!cancelled_) { while (!cancelled_) {
Task t = q->PopFront(); Task t = q.PopFront();
if (!t.f) { if (!t.f) {
t = Steal(); t = Steal();
if (!t.f) { if (!t.f) {
@ -243,7 +261,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
unsigned inc = coprimes_[r % coprimes_.size()]; unsigned inc = coprimes_[r % coprimes_.size()];
unsigned victim = r % size; unsigned victim = r % size;
for (unsigned i = 0; i < size; i++) { for (unsigned i = 0; i < size; i++) {
Task t = queues_[victim]->PopBack(); Task t = queues_[victim].PopBack();
if (t.f) { if (t.f) {
return t; return t;
} }
@ -270,7 +288,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
if (cancelled_) { if (cancelled_) {
return false; return false;
} else { } else {
*t = queues_[victim]->PopBack(); *t = queues_[victim].PopBack();
return true; return true;
} }
} }
@ -278,7 +296,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// If we are shutting down and all worker threads blocked without work, // If we are shutting down and all worker threads blocked without work,
// that's we are done. // that's we are done.
blocked_++; blocked_++;
if (done_ && blocked_ == num_threads_) { // TODO is blocked_ required to be unsigned?
if (done_ && blocked_ == static_cast<unsigned>(num_threads_)) {
ec_.CancelWait(waiter); ec_.CancelWait(waiter);
// Almost done, but need to re-check queues. // Almost done, but need to re-check queues.
// Consider that all queues are empty and all worker threads are preempted // Consider that all queues are empty and all worker threads are preempted
@ -311,7 +330,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
unsigned inc = coprimes_[r % coprimes_.size()]; unsigned inc = coprimes_[r % coprimes_.size()];
unsigned victim = r % size; unsigned victim = r % size;
for (unsigned i = 0; i < size; i++) { for (unsigned i = 0; i < size; i++) {
if (!queues_[victim]->Empty()) { if (!queues_[victim].Empty()) {
return victim; return victim;
} }
victim += inc; victim += inc;
@ -322,10 +341,24 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
return -1; return -1;
} }
static EIGEN_STRONG_INLINE PerThread* GetPerThread() { static EIGEN_STRONG_INLINE uint64_t GlobalThreadIdHash() {
return std::hash<std::thread::id>()(std::this_thread::get_id());
}
EIGEN_STRONG_INLINE PerThread* GetPerThread() {
#ifndef EIGEN_THREAD_LOCAL
static PerThread dummy;
auto it = per_thread_map_.find(GlobalThreadIdHash());
if (it == per_thread_map_.end()) {
return &dummy;
} else {
return it->second.get();
}
#else
EIGEN_THREAD_LOCAL PerThread per_thread_; EIGEN_THREAD_LOCAL PerThread per_thread_;
PerThread* pt = &per_thread_; PerThread* pt = &per_thread_;
return pt; return pt;
#endif
} }
static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) { static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) {
@ -333,7 +366,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// Update the internal state // Update the internal state
*state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
// Generate the random output (using the PCG-XSH-RS scheme) // Generate the random output (using the PCG-XSH-RS scheme)
return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61))); return static_cast<unsigned>((current ^ (current >> 22)) >>
(22 + (current >> 61)));
} }
}; };

View File

@ -10,7 +10,6 @@
#ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ #ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
#define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ #define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_
namespace Eigen { namespace Eigen {
// RunQueue is a fixed-size, partially non-blocking deque or Work items. // RunQueue is a fixed-size, partially non-blocking deque or Work items.
@ -47,7 +46,7 @@ class RunQueue {
array_[i].state.store(kEmpty, std::memory_order_relaxed); array_[i].state.store(kEmpty, std::memory_order_relaxed);
} }
~RunQueue() { eigen_assert(Size() == 0); } ~RunQueue() { eigen_plain_assert(Size() == 0); }
// PushFront inserts w at the beginning of the queue. // PushFront inserts w at the beginning of the queue.
// If queue is full returns w, otherwise returns default-constructed Work. // If queue is full returns w, otherwise returns default-constructed Work.
@ -131,9 +130,8 @@ class RunQueue {
Elem* e = &array_[mid & kMask]; Elem* e = &array_[mid & kMask];
uint8_t s = e->state.load(std::memory_order_relaxed); uint8_t s = e->state.load(std::memory_order_relaxed);
if (n == 0) { if (n == 0) {
if (s != kReady || if (s != kReady || !e->state.compare_exchange_strong(
!e->state.compare_exchange_strong(s, kBusy, s, kBusy, std::memory_order_acquire))
std::memory_order_acquire))
continue; continue;
start = mid; start = mid;
} else { } else {

View File

@ -10,13 +10,45 @@
#ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H #ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
#define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H #define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H
// Try to come up with a portable implementation of thread local variables #if EIGEN_MAX_CPP_VER >= 11 && \
#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7) ((EIGEN_COMP_GNUC && EIGEN_GNUC_AT_LEAST(4, 8)) || \
#define EIGEN_THREAD_LOCAL static __thread __has_feature(cxx_thread_local))
#elif EIGEN_COMP_CLANG
#define EIGEN_THREAD_LOCAL static __thread
#else
#define EIGEN_THREAD_LOCAL static thread_local #define EIGEN_THREAD_LOCAL static thread_local
#endif #endif
// Disable TLS for Apple and Android builds with older toolchains.
#if defined(__APPLE__)
// Included for TARGET_OS_IPHONE, __IPHONE_OS_VERSION_MIN_REQUIRED,
// __IPHONE_8_0.
#include <Availability.h>
#include <TargetConditionals.h>
#endif
// Checks whether C++11's `thread_local` storage duration specifier is
// supported.
#if defined(__apple_build_version__) && \
((__apple_build_version__ < 8000042) || \
(TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0))
// Notes: Xcode's clang did not support `thread_local` until version
// 8, and even then not for all iOS < 9.0.
#undef EIGEN_THREAD_LOCAL
#elif defined(__ANDROID__) && EIGEN_COMP_CLANG
// There are platforms for which TLS should not be used even though the compiler
// makes it seem like it's supported (Android NDK < r12b for example).
// This is primarily because of linker problems and toolchain misconfiguration:
// TLS isn't supported until NDK r12b per
// https://developer.android.com/ndk/downloads/revision_history.html
// Since NDK r16, `__NDK_MAJOR__` and `__NDK_MINOR__` are defined in
// <android/ndk-version.h>. For NDK < r16, users should define these macros,
// e.g. `-D__NDK_MAJOR__=11 -D__NKD_MINOR__=0` for NDK r11.
#if __has_include(<android/ndk-version.h>)
#include <android/ndk-version.h>
#endif // __has_include(<android/ndk-version.h>)
#if defined(__ANDROID__) && defined(__clang__) && defined(__NDK_MAJOR__) && \
defined(__NDK_MINOR__) && \
((__NDK_MAJOR__ < 12) || ((__NDK_MAJOR__ == 12) && (__NDK_MINOR__ < 1)))
#undef EIGEN_THREAD_LOCAL
#endif
#endif // defined(__ANDROID__) && defined(__clang__)
#endif // EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H #endif // EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H

View File

@ -25,6 +25,11 @@ template <typename T, size_t n> class array {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE T& at(size_t index) { eigen_assert(index < size()); return values[index]; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T& at(size_t index) const { eigen_assert(index < size()); return values[index]; }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE T& front() { return values[0]; } EIGEN_STRONG_INLINE T& front() { return values[0]; }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -202,16 +207,16 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array<T,N>& a) {
} }
template<class T, std::size_t N> struct array_size<array<T,N> > { template<class T, std::size_t N> struct array_size<array<T,N> > {
static const size_t value = N; enum { value = N };
}; };
template<class T, std::size_t N> struct array_size<array<T,N>& > { template<class T, std::size_t N> struct array_size<array<T,N>& > {
static const size_t value = N; enum { value = N };
}; };
template<class T, std::size_t N> struct array_size<const array<T,N> > { template<class T, std::size_t N> struct array_size<const array<T,N> > {
static const size_t value = N; enum { value = N };
}; };
template<class T, std::size_t N> struct array_size<const array<T,N>& > { template<class T, std::size_t N> struct array_size<const array<T,N>& > {
static const size_t value = N; enum { value = N };
}; };
} // end namespace internal } // end namespace internal

View File

@ -35,7 +35,6 @@ class MaxSizeVector {
explicit MaxSizeVector(size_t n) explicit MaxSizeVector(size_t n)
: reserve_(n), size_(0), : reserve_(n), size_(0),
data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; }
} }
// Construct a new MaxSizeVector, reserve and resize to n. // Construct a new MaxSizeVector, reserve and resize to n.
@ -44,35 +43,55 @@ class MaxSizeVector {
MaxSizeVector(size_t n, const T& init) MaxSizeVector(size_t n, const T& init)
: reserve_(n), size_(n), : reserve_(n), size_(n),
data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) {
for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); } size_t i = 0;
EIGEN_TRY
{
for(; i < size_; ++i) { new (&data_[i]) T(init); }
}
EIGEN_CATCH(...)
{
// Construction failed, destruct in reverse order:
for(; (i+1) > 0; --i) { data_[i-1].~T(); }
internal::aligned_free(data_);
EIGEN_THROW;
}
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
~MaxSizeVector() { ~MaxSizeVector() {
for (size_t i = 0; i < size_; ++i) { for (size_t i = size_; i > 0; --i) {
data_[i].~T(); data_[i-1].~T();
} }
internal::aligned_free(data_); internal::aligned_free(data_);
} }
void resize(size_t n) { void resize(size_t n) {
eigen_assert(n <= reserve_); eigen_assert(n <= reserve_);
for (size_t i = size_; i < n; ++i) { for (; size_ < n; ++size_) {
new (&data_[i]) T; new (&data_[size_]) T;
} }
for (size_t i = n; i < size_; ++i) { for (; size_ > n; --size_) {
data_[i].~T(); data_[size_-1].~T();
} }
size_ = n; eigen_assert(size_ == n);
} }
// Append new elements (up to reserved size). // Append new elements (up to reserved size).
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void push_back(const T& t) { void push_back(const T& t) {
eigen_assert(size_ < reserve_); eigen_assert(size_ < reserve_);
data_[size_++] = t; new (&data_[size_++]) T(t);
} }
// For C++03 compatibility this only takes one argument
template<class X>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void emplace_back(const X& x) {
eigen_assert(size_ < reserve_);
new (&data_[size_++]) T(x);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const T& operator[] (size_t i) const { const T& operator[] (size_t i) const {
eigen_assert(i < size_); eigen_assert(i < size_);
@ -99,11 +118,8 @@ class MaxSizeVector {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void pop_back() { void pop_back() {
// NOTE: This does not destroy the value at the end the way
// std::vector's version of pop_back() does. That happens when
// the Vector is destroyed.
eigen_assert(size_ > 0); eigen_assert(size_ > 0);
size_--; data_[--size_].~T();
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE

View File

@ -289,6 +289,7 @@ class FFT
void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, Index nfft=-1) void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, Index nfft=-1)
{ {
typedef typename ComplexDerived::Scalar src_type; typedef typename ComplexDerived::Scalar src_type;
typedef typename ComplexDerived::RealScalar real_type;
typedef typename OutputDerived::Scalar dst_type; typedef typename OutputDerived::Scalar dst_type;
const bool realfft= (NumTraits<dst_type>::IsComplex == 0); const bool realfft= (NumTraits<dst_type>::IsComplex == 0);
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
@ -329,9 +330,9 @@ class FFT
tmp.head(nhead) = src.head(nhead); tmp.head(nhead) = src.head(nhead);
tmp.tail(ntail) = src.tail(ntail); tmp.tail(ntail) = src.tail(ntail);
if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it
tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*src_type(.5); tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*real_type(.5);
}else{ // expanding -- split the old Nyquist bin into two halves }else{ // expanding -- split the old Nyquist bin into two halves
tmp(nhead) = src(nhead) * src_type(.5); tmp(nhead) = src(nhead) * real_type(.5);
tmp(tmp.size()-nhead) = tmp(nhead); tmp(tmp.size()-nhead) = tmp(nhead);
} }
} }

View File

@ -184,7 +184,7 @@ inline void glRotate(const Rotation2D<float>& rot)
} }
inline void glRotate(const Rotation2D<double>& rot) inline void glRotate(const Rotation2D<double>& rot)
{ {
glRotated(rot.angle()*180.0/EIGEN_PI, 0.0, 0.0, 1.0); glRotated(rot.angle()*180.0/double(EIGEN_PI), 0.0, 0.0, 1.0);
} }
template<typename Derived> void glRotate(const RotationBase<Derived,3>& rot) template<typename Derived> void glRotate(const RotationBase<Derived,3>& rot)

View File

@ -35,6 +35,7 @@ struct get_boxes_helper {
{ {
outBoxes.insert(outBoxes.end(), boxBegin, boxEnd); outBoxes.insert(outBoxes.end(), boxBegin, boxEnd);
eigen_assert(outBoxes.size() == objects.size()); eigen_assert(outBoxes.size() == objects.size());
EIGEN_ONLY_USED_FOR_DEBUG(objects);
} }
}; };

View File

@ -249,8 +249,6 @@ namespace Eigen
DenseIndex degree, DenseIndex degree,
const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots) const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots)
{ {
typedef typename Spline<_Scalar, _Dim, _Degree>::BasisVectorType BasisVectorType;
const DenseIndex p = degree; const DenseIndex p = degree;
const DenseIndex i = Spline::Span(u, degree, knots); const DenseIndex i = Spline::Span(u, degree, knots);
@ -380,9 +378,6 @@ namespace Eigen
typedef Spline<_Scalar, _Dim, _Degree> SplineType; typedef Spline<_Scalar, _Dim, _Degree> SplineType;
enum { Order = SplineTraits<SplineType>::OrderAtCompileTime }; enum { Order = SplineTraits<SplineType>::OrderAtCompileTime };
typedef typename SplineTraits<SplineType>::Scalar Scalar;
typedef typename SplineTraits<SplineType>::BasisVectorType BasisVectorType;
const DenseIndex span = SplineType::Span(u, p, U); const DenseIndex span = SplineType::Span(u, p, U);
const DenseIndex n = (std::min)(p, order); const DenseIndex n = (std::min)(p, order);

View File

@ -197,6 +197,7 @@ template<typename Scalar> void check_singular_cases(const Scalar& singularBeta)
template<typename Scalar> void eulerangles_manual() template<typename Scalar> void eulerangles_manual()
{ {
typedef Matrix<Scalar,3,1> Vector3; typedef Matrix<Scalar,3,1> Vector3;
typedef Matrix<Scalar,Dynamic,1> VectorX;
const Vector3 Zero = Vector3::Zero(); const Vector3 Zero = Vector3::Zero();
const Scalar PI = Scalar(EIGEN_PI); const Scalar PI = Scalar(EIGEN_PI);
@ -213,13 +214,13 @@ template<typename Scalar> void eulerangles_manual()
check_singular_cases(-PI); check_singular_cases(-PI);
// non-singular cases // non-singular cases
VectorXd alpha = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); VectorX alpha = VectorX::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
VectorXd beta = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI); VectorX beta = VectorX::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI);
VectorXd gamma = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); VectorX gamma = VectorX::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
for (int i = 0; i < alpha.size(); ++i) { for (int i = 0; i < alpha.size(); ++i) {
for (int j = 0; j < beta.size(); ++j) { for (int j = 0; j < beta.size(); ++j) {
for (int k = 0; k < gamma.size(); ++k) { for (int k = 0; k < gamma.size(); ++k) {
check_all_var(Vector3d(alpha(i), beta(j), gamma(k))); check_all_var(Vector3(alpha(i), beta(j), gamma(k)));
} }
} }
} }

View File

@ -10,6 +10,7 @@
#include "main.h" #include "main.h"
#include <algorithm>
#include <set> #include <set>
#include <Eigen/CXX11/Tensor> #include <Eigen/CXX11/Tensor>
@ -19,22 +20,21 @@ using Eigen::Index;
using Eigen::RowMajor; using Eigen::RowMajor;
using Eigen::ColMajor; using Eigen::ColMajor;
using internal::TensorBlockShapeType;
template<typename T> template<typename T>
static const T& choose(int layout, const T& col, const T& row) { static const T& choose(int layout, const T& col, const T& row) {
return layout == ColMajor ? col : row; return layout == ColMajor ? col : row;
} }
static const TensorBlockShapeType RandomShape() { static internal::TensorBlockShapeType RandomShape() {
return internal::random<bool>() return internal::random<bool>()
? internal::TensorBlockShapeType::kUniformAllDims ? internal::kUniformAllDims
: internal::TensorBlockShapeType::kSkewedInnerDims; : internal::kSkewedInnerDims;
} }
template <int NumDims> template <int NumDims>
static std::size_t RandomTargetSize(const DSizes<Index, NumDims>& dims) { static Index RandomTargetSize(const DSizes<Index, NumDims>& dims) {
return internal::random<int>(1, dims.TotalSize()); return internal::random<Index>(1, dims.TotalSize());
} }
template <int NumDims> template <int NumDims>
@ -44,12 +44,12 @@ static DSizes<Index, NumDims> RandomDims() {
dims[i] = internal::random<int>(1, 20); dims[i] = internal::random<int>(1, 20);
} }
return DSizes<Index, NumDims>(dims); return DSizes<Index, NumDims>(dims);
}; }
/** Dummy data type to test TensorBlock copy ops. */ /** Dummy data type to test TensorBlock copy ops. */
struct Data { struct Data {
Data() : Data(0) {} Data() : value(0) {}
explicit Data(int v) { value = v; } explicit Data(int v) : value(v) { }
int value; int value;
}; };
@ -91,21 +91,19 @@ static void Debug(DSizes<Index, NumDims> dims) {
template <int Layout> template <int Layout>
static void test_block_mapper_sanity() static void test_block_mapper_sanity()
{ {
using T = int; typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
using TensorBlock = internal::TensorBlock<T, Index, 2, Layout>;
using TensorBlockMapper = internal::TensorBlockMapper<T, Index, 2, Layout>;
DSizes<Index, 2> tensor_dims(100, 100); DSizes<Index, 2> tensor_dims(100, 100);
// Test uniform blocks. // Test uniform blocks.
TensorBlockMapper uniform_block_mapper( TensorBlockMapper uniform_block_mapper(
tensor_dims, internal::TensorBlockShapeType::kUniformAllDims, 100); tensor_dims, internal::kUniformAllDims, 100);
VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100); VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100);
VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100); VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100);
// 10x10 blocks // 10x10 blocks
auto uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, nullptr); typename TensorBlockMapper::Block uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10); VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10);
VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10); VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10);
// Depending on a layout we stride by cols rows. // Depending on a layout we stride by cols rows.
@ -117,13 +115,13 @@ static void test_block_mapper_sanity()
// Test skewed to inner dims blocks. // Test skewed to inner dims blocks.
TensorBlockMapper skewed_block_mapper( TensorBlockMapper skewed_block_mapper(
tensor_dims, internal::TensorBlockShapeType::kSkewedInnerDims, 100); tensor_dims, internal::kSkewedInnerDims, 100);
VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100); VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100);
VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100); VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100);
// 1x100 (100x1) rows/cols depending on a tensor layout. // 1x100 (100x1) rows/cols depending on a tensor layout.
auto skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, nullptr); typename TensorBlockMapper::Block skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1)); VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1));
VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100)); VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100));
// Depending on a layout we stride by cols rows. // Depending on a layout we stride by cols rows.
@ -140,12 +138,13 @@ template <typename T, int Layout, int NumDims>
static void UpdateCoeffSet( static void UpdateCoeffSet(
const internal::TensorBlock<T, Index, NumDims, Layout>& block, const internal::TensorBlock<T, Index, NumDims, Layout>& block,
Index first_coeff_index, int dim_index, std::set<Index>* visited_coeffs) { Index first_coeff_index, int dim_index, std::set<Index>* visited_coeffs) {
const DSizes<Index, NumDims> block_sizes = block.block_sizes(); const DSizes<Index, NumDims>& block_sizes = block.block_sizes();
const DSizes<Index, NumDims> tensor_strides = block.tensor_strides(); const DSizes<Index, NumDims>& tensor_strides = block.tensor_strides();
for (int i = 0; i < block_sizes[dim_index]; ++i) { for (int i = 0; i < block_sizes[dim_index]; ++i) {
if (tensor_strides[dim_index] == 1) { if (tensor_strides[dim_index] == 1) {
auto inserted = visited_coeffs->insert(first_coeff_index + i); typedef std::pair<std::set<Index>::iterator, bool> ReturnType;
ReturnType inserted = visited_coeffs->insert(first_coeff_index + i);
VERIFY_IS_EQUAL(inserted.second, true); VERIFY_IS_EQUAL(inserted.second, true);
} else { } else {
int next_dim_index = dim_index + choose(Layout, -1, 1); int next_dim_index = dim_index + choose(Layout, -1, 1);
@ -158,9 +157,8 @@ static void UpdateCoeffSet(
template <typename T, int NumDims, int Layout> template <typename T, int NumDims, int Layout>
static void test_block_mapper_maps_every_element() { static void test_block_mapper_maps_every_element() {
using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>; typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
using TensorBlockMapper = typedef internal::TensorBlockMapper<T, Index, NumDims, Layout> TensorBlockMapper;
internal::TensorBlockMapper<T, Index, NumDims, Layout>;
DSizes<Index, NumDims> dims = RandomDims<NumDims>(); DSizes<Index, NumDims> dims = RandomDims<NumDims>();
@ -171,7 +169,7 @@ static void test_block_mapper_maps_every_element() {
TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims)); TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims));
for (int i = 0; i < block_mapper.total_block_count(); ++i) { for (int i = 0; i < block_mapper.total_block_count(); ++i) {
TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(), UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
choose(Layout, NumDims - 1, 0), choose(Layout, NumDims - 1, 0),
&coeff_set); &coeff_set);
@ -180,16 +178,15 @@ static void test_block_mapper_maps_every_element() {
// Verify that every coefficient in the original Tensor is accessible through // Verify that every coefficient in the original Tensor is accessible through
// TensorBlock only once. // TensorBlock only once.
Index total_coeffs = dims.TotalSize(); Index total_coeffs = dims.TotalSize();
VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs); VERIFY_IS_EQUAL(Index(coeff_set.size()), total_coeffs);
VERIFY_IS_EQUAL(*coeff_set.begin(), 0); VERIFY_IS_EQUAL(*coeff_set.begin(), 0);
VERIFY_IS_EQUAL(*coeff_set.rbegin(), total_coeffs - 1); VERIFY_IS_EQUAL(*coeff_set.rbegin(), total_coeffs - 1);
} }
template <typename T, int NumDims, int Layout> template <typename T, int NumDims, int Layout>
static void test_slice_block_mapper_maps_every_element() { static void test_slice_block_mapper_maps_every_element() {
using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>; typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock;
using TensorSliceBlockMapper = typedef internal::TensorSliceBlockMapper<T, Index, NumDims, Layout> TensorSliceBlockMapper;
internal::TensorSliceBlockMapper<T, Index, NumDims, Layout>;
DSizes<Index, NumDims> tensor_dims = RandomDims<NumDims>(); DSizes<Index, NumDims> tensor_dims = RandomDims<NumDims>();
DSizes<Index, NumDims> tensor_slice_offsets = RandomDims<NumDims>(); DSizes<Index, NumDims> tensor_slice_offsets = RandomDims<NumDims>();
@ -206,12 +203,12 @@ static void test_slice_block_mapper_maps_every_element() {
// Keep track of elements indices available via block access. // Keep track of elements indices available via block access.
std::set<Index> coeff_set; std::set<Index> coeff_set;
auto total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize()); int total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize());
// Pick a random dimension sizes for the tensor blocks. // Pick a random dimension sizes for the tensor blocks.
DSizes<Index, NumDims> block_sizes; DSizes<Index, NumDims> block_sizes;
for (int i = 0; i < NumDims; ++i) { for (int i = 0; i < NumDims; ++i) {
block_sizes[i] = internal::random<int>(1, tensor_slice_extents[i]); block_sizes[i] = internal::random<Index>(1, tensor_slice_extents[i]);
} }
TensorSliceBlockMapper block_mapper(tensor_dims, tensor_slice_offsets, TensorSliceBlockMapper block_mapper(tensor_dims, tensor_slice_offsets,
@ -219,13 +216,13 @@ static void test_slice_block_mapper_maps_every_element() {
DimensionList<Index, NumDims>()); DimensionList<Index, NumDims>());
for (int i = 0; i < block_mapper.total_block_count(); ++i) { for (int i = 0; i < block_mapper.total_block_count(); ++i) {
TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(i, NULL);
UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(), UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(),
choose(Layout, NumDims - 1, 0), choose(Layout, NumDims - 1, 0),
&coeff_set); &coeff_set);
} }
VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs); VERIFY_IS_EQUAL(Index(coeff_set.size()), total_coeffs);
} }
template <typename T, int NumDims, int Layout> template <typename T, int NumDims, int Layout>
@ -240,7 +237,7 @@ static void test_block_io_copy_data_from_source_to_target() {
TensorBlockWriter; TensorBlockWriter;
DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>(); DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
const auto input_tensor_size = input_tensor_dims.TotalSize(); const Index input_tensor_size = input_tensor_dims.TotalSize();
T* input_data = GenerateRandomData<T>(input_tensor_size); T* input_data = GenerateRandomData<T>(input_tensor_size);
T* output_data = new T[input_tensor_size]; T* output_data = new T[input_tensor_size];
@ -265,14 +262,14 @@ static void test_block_io_copy_data_from_source_to_target() {
} }
template <int Layout, int NumDims> template <int Layout, int NumDims>
static int GetInputIndex(Index output_index, static Index GetInputIndex(Index output_index,
const array<Index, NumDims>& output_to_input_dim_map, const array<Index, NumDims>& output_to_input_dim_map,
const array<Index, NumDims>& input_strides, const array<Index, NumDims>& input_strides,
const array<Index, NumDims>& output_strides) { const array<Index, NumDims>& output_strides) {
int input_index = 0; int input_index = 0;
if (Layout == ColMajor) { if (Layout == ColMajor) {
for (int i = NumDims - 1; i > 0; --i) { for (int i = NumDims - 1; i > 0; --i) {
const int idx = output_index / output_strides[i]; const Index idx = output_index / output_strides[i];
input_index += idx * input_strides[output_to_input_dim_map[i]]; input_index += idx * input_strides[output_to_input_dim_map[i]];
output_index -= idx * output_strides[i]; output_index -= idx * output_strides[i];
} }
@ -280,7 +277,7 @@ static int GetInputIndex(Index output_index,
output_index * input_strides[output_to_input_dim_map[0]]; output_index * input_strides[output_to_input_dim_map[0]];
} else { } else {
for (int i = 0; i < NumDims - 1; ++i) { for (int i = 0; i < NumDims - 1; ++i) {
const int idx = output_index / output_strides[i]; const Index idx = output_index / output_strides[i];
input_index += idx * input_strides[output_to_input_dim_map[i]]; input_index += idx * input_strides[output_to_input_dim_map[i]];
output_index -= idx * output_strides[i]; output_index -= idx * output_strides[i];
} }
@ -319,7 +316,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
TensorBlockWriter; TensorBlockWriter;
DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>(); DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>();
const auto input_tensor_size = input_tensor_dims.TotalSize(); const Index input_tensor_size = input_tensor_dims.TotalSize();
// Create a random input tensor. // Create a random input tensor.
T* input_data = GenerateRandomData<T>(input_tensor_size); T* input_data = GenerateRandomData<T>(input_tensor_size);
@ -327,7 +324,7 @@ static void test_block_io_copy_using_reordered_dimensions() {
// Create a random dimension re-ordering/shuffle. // Create a random dimension re-ordering/shuffle.
std::vector<Index> shuffle; std::vector<Index> shuffle;
for (int i = 0; i < NumDims; ++i) shuffle.push_back(i); for (int i = 0; i < NumDims; ++i) shuffle.push_back(i);
std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937()); std::random_shuffle(shuffle.begin(), shuffle.end());
DSizes<Index, NumDims> output_tensor_dims; DSizes<Index, NumDims> output_tensor_dims;
array<Index, NumDims> input_to_output_dim_map; array<Index, NumDims> input_to_output_dim_map;
@ -342,8 +339,8 @@ static void test_block_io_copy_using_reordered_dimensions() {
TensorBlockMapper block_mapper(output_tensor_dims, RandomShape(), TensorBlockMapper block_mapper(output_tensor_dims, RandomShape(),
RandomTargetSize(input_tensor_dims)); RandomTargetSize(input_tensor_dims));
auto* block_data = new T[block_mapper.block_dims_total_size()]; T* block_data = new T[block_mapper.block_dims_total_size()];
auto* output_data = new T[input_tensor_size]; T* output_data = new T[input_tensor_size];
array<Index, NumDims> input_tensor_strides = array<Index, NumDims> input_tensor_strides =
ComputeStrides<Layout, NumDims>(input_tensor_dims); ComputeStrides<Layout, NumDims>(input_tensor_dims);
@ -370,6 +367,40 @@ static void test_block_io_copy_using_reordered_dimensions() {
delete[] output_data; delete[] output_data;
} }
template<typename Scalar, typename StorageIndex, int Dim>
class EqualityChecker
{
const Scalar* input_data;
const DSizes<StorageIndex, Dim> &input_dims, &input_strides, &output_dims, &output_strides;
void check_recursive(const Scalar* input, const Scalar* output, int depth=0) const
{
if(depth==Dim)
{
VERIFY_IS_EQUAL(*input, *output);
return;
}
for(int i=0; i<output_dims[depth]; ++i)
{
check_recursive(input + i % input_dims[depth] * input_strides[depth], output + i*output_strides[depth], depth+1);
}
}
public:
EqualityChecker(const Scalar* input_data_,
const DSizes<StorageIndex, Dim> &input_dims_, const DSizes<StorageIndex, Dim> &input_strides_,
const DSizes<StorageIndex, Dim> &output_dims_, const DSizes<StorageIndex, Dim> &output_strides_)
: input_data(input_data_)
, input_dims(input_dims_), input_strides(input_strides_)
, output_dims(output_dims_), output_strides(output_strides_)
{}
void operator()(const Scalar* output_data) const
{
check_recursive(input_data, output_data);
}
};
template <int Layout> template <int Layout>
static void test_block_io_zero_stride() static void test_block_io_zero_stride()
{ {
@ -385,8 +416,8 @@ static void test_block_io_zero_stride()
input_tensor_dims[0] = 1; input_tensor_dims[0] = 1;
input_tensor_dims[2] = 1; input_tensor_dims[2] = 1;
input_tensor_dims[4] = 1; input_tensor_dims[4] = 1;
const auto input_tensor_size = input_tensor_dims.TotalSize(); const Index input_tensor_size = input_tensor_dims.TotalSize();
auto* input_data = GenerateRandomData<float>(input_tensor_size); float* input_data = GenerateRandomData<float>(input_tensor_size);
DSizes<Index, 5> output_tensor_dims = rnd_dims; DSizes<Index, 5> output_tensor_dims = rnd_dims;
@ -401,33 +432,10 @@ static void test_block_io_zero_stride()
input_tensor_strides_with_zeros[4] = 0; input_tensor_strides_with_zeros[4] = 0;
// Verify that data was correctly read/written from/into the block. // Verify that data was correctly read/written from/into the block.
const auto verify_is_equal = [&](const float* output_data) { const EqualityChecker<float, Index, 5> verify_is_equal(input_data, input_tensor_dims, input_tensor_strides, output_tensor_dims, output_tensor_strides);
for (int i = 0; i < output_tensor_dims[0]; ++i) {
for (int j = 0; j < output_tensor_dims[1]; ++j) {
for (int k = 0; k < output_tensor_dims[2]; ++k) {
for (int l = 0; l < output_tensor_dims[3]; ++l) {
for (int m = 0; m < output_tensor_dims[4]; ++m) {
const Index output_offset =
i * output_tensor_strides[0] + j * output_tensor_strides[1] +
k * output_tensor_strides[2] + l * output_tensor_strides[3] +
m * output_tensor_strides[4];
const Index input_offset =
i % input_tensor_dims[0] * input_tensor_strides[0] +
j % input_tensor_dims[1] * input_tensor_strides[1] +
k % input_tensor_dims[2] * input_tensor_strides[2] +
l % input_tensor_dims[3] * input_tensor_strides[3] +
m % input_tensor_dims[4] * input_tensor_strides[4];
VERIFY_IS_EQUAL(output_data[output_offset],
input_data[input_offset]);
}
}
}
}
}
};
{ {
auto* output_data = new float[output_tensor_dims.TotalSize()]; float* output_data = new float[output_tensor_dims.TotalSize()];
TensorBlock read_block(0, output_tensor_dims, output_tensor_strides, TensorBlock read_block(0, output_tensor_dims, output_tensor_strides,
input_tensor_strides_with_zeros, output_data); input_tensor_strides_with_zeros, output_data);
TensorBlockReader::Run(&read_block, input_data); TensorBlockReader::Run(&read_block, input_data);
@ -436,7 +444,7 @@ static void test_block_io_zero_stride()
} }
{ {
auto* output_data = new float[output_tensor_dims.TotalSize()]; float* output_data = new float[output_tensor_dims.TotalSize()];
TensorBlock write_block(0, output_tensor_dims, TensorBlock write_block(0, output_tensor_dims,
input_tensor_strides_with_zeros, input_tensor_strides_with_zeros,
output_tensor_strides, input_data); output_tensor_strides, input_data);
@ -459,14 +467,14 @@ static void test_block_io_squeeze_ones() {
// Total size > 1. // Total size > 1.
{ {
DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1); DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1);
const auto total_size = block_sizes.TotalSize(); const Index total_size = block_sizes.TotalSize();
// Create a random input tensor. // Create a random input tensor.
auto* input_data = GenerateRandomData<float>(total_size); float* input_data = GenerateRandomData<float>(total_size);
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes)); DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
{ {
auto* output_data = new float[block_sizes.TotalSize()]; float* output_data = new float[block_sizes.TotalSize()];
TensorBlock read_block(0, block_sizes, strides, strides, output_data); TensorBlock read_block(0, block_sizes, strides, strides, output_data);
TensorBlockReader::Run(&read_block, input_data); TensorBlockReader::Run(&read_block, input_data);
for (int i = 0; i < total_size; ++i) { for (int i = 0; i < total_size; ++i) {
@ -476,7 +484,7 @@ static void test_block_io_squeeze_ones() {
} }
{ {
auto* output_data = new float[block_sizes.TotalSize()]; float* output_data = new float[block_sizes.TotalSize()];
TensorBlock write_block(0, block_sizes, strides, strides, input_data); TensorBlock write_block(0, block_sizes, strides, strides, input_data);
TensorBlockWriter::Run(write_block, output_data); TensorBlockWriter::Run(write_block, output_data);
for (int i = 0; i < total_size; ++i) { for (int i = 0; i < total_size; ++i) {
@ -489,14 +497,14 @@ static void test_block_io_squeeze_ones() {
// Total size == 1. // Total size == 1.
{ {
DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1); DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1);
const auto total_size = block_sizes.TotalSize(); const Index total_size = block_sizes.TotalSize();
// Create a random input tensor. // Create a random input tensor.
auto* input_data = GenerateRandomData<float>(total_size); float* input_data = GenerateRandomData<float>(total_size);
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes)); DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
{ {
auto* output_data = new float[block_sizes.TotalSize()]; float* output_data = new float[block_sizes.TotalSize()];
TensorBlock read_block(0, block_sizes, strides, strides, output_data); TensorBlock read_block(0, block_sizes, strides, strides, output_data);
TensorBlockReader::Run(&read_block, input_data); TensorBlockReader::Run(&read_block, input_data);
for (int i = 0; i < total_size; ++i) { for (int i = 0; i < total_size; ++i) {
@ -506,7 +514,7 @@ static void test_block_io_squeeze_ones() {
} }
{ {
auto* output_data = new float[block_sizes.TotalSize()]; float* output_data = new float[block_sizes.TotalSize()];
TensorBlock write_block(0, block_sizes, strides, strides, input_data); TensorBlock write_block(0, block_sizes, strides, strides, input_data);
TensorBlockWriter::Run(write_block, output_data); TensorBlockWriter::Run(write_block, output_data);
for (int i = 0; i < total_size; ++i) { for (int i = 0; i < total_size; ++i) {
@ -635,7 +643,7 @@ static void test_block_cwise_binary_io_basic() {
DSizes<Index, NumDims> block_sizes = RandomDims<NumDims>(); DSizes<Index, NumDims> block_sizes = RandomDims<NumDims>();
DSizes<Index, NumDims> strides(ComputeStrides<Layout, NumDims>(block_sizes)); DSizes<Index, NumDims> strides(ComputeStrides<Layout, NumDims>(block_sizes));
const auto total_size = block_sizes.TotalSize(); const Index total_size = block_sizes.TotalSize();
// Create a random input tensors. // Create a random input tensors.
T* left_data = GenerateRandomData<T>(total_size); T* left_data = GenerateRandomData<T>(total_size);
@ -664,13 +672,13 @@ static void test_block_cwise_binary_io_squeeze_ones() {
DSizes<Index, 5> block_sizes(1, 2, 1, 3, 1); DSizes<Index, 5> block_sizes(1, 2, 1, 3, 1);
DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes)); DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes));
const auto total_size = block_sizes.TotalSize(); const Index total_size = block_sizes.TotalSize();
// Create a random input tensors. // Create a random input tensors.
auto* left_data = GenerateRandomData<float>(total_size); float* left_data = GenerateRandomData<float>(total_size);
auto* right_data = GenerateRandomData<float>(total_size); float* right_data = GenerateRandomData<float>(total_size);
auto* output_data = new float[total_size]; float* output_data = new float[total_size];
BinaryFunctor functor; BinaryFunctor functor;
TensorBlockCwiseBinaryIO::Run(functor, block_sizes, strides, output_data, TensorBlockCwiseBinaryIO::Run(functor, block_sizes, strides, output_data,
strides, left_data, strides, right_data); strides, left_data, strides, right_data);
@ -711,14 +719,14 @@ static void test_block_cwise_binary_io_zero_strides() {
right_strides[3] = 0; right_strides[3] = 0;
// Generate random data. // Generate random data.
auto* left_data = GenerateRandomData<float>(left_sizes.TotalSize()); float* left_data = GenerateRandomData<float>(left_sizes.TotalSize());
auto* right_data = GenerateRandomData<float>(right_sizes.TotalSize()); float* right_data = GenerateRandomData<float>(right_sizes.TotalSize());
DSizes<Index, 5> output_sizes = rnd_dims; DSizes<Index, 5> output_sizes = rnd_dims;
DSizes<Index, 5> output_strides(ComputeStrides<Layout, 5>(output_sizes)); DSizes<Index, 5> output_strides(ComputeStrides<Layout, 5>(output_sizes));
const auto output_total_size = output_sizes.TotalSize(); const Index output_total_size = output_sizes.TotalSize();
auto* output_data = new float[output_total_size]; float* output_data = new float[output_total_size];
BinaryFunctor functor; BinaryFunctor functor;
TensorBlockCwiseBinaryIO::Run(functor, output_sizes, output_strides, TensorBlockCwiseBinaryIO::Run(functor, output_sizes, output_strides,
@ -755,17 +763,16 @@ static void test_block_cwise_binary_io_zero_strides() {
template <int Layout> template <int Layout>
static void test_uniform_block_shape() static void test_uniform_block_shape()
{ {
using T = int; typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock; typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
{ {
// Test shape 'UniformAllDims' with uniform 'max_coeff count'. // Test shape 'UniformAllDims' with uniform 'max_coeff count'.
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5; const Index max_coeff_count = 5 * 5 * 5 * 5 * 5;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
} }
@ -776,10 +783,10 @@ static void test_uniform_block_shape()
// partially into first inner-most dimension. // partially into first inner-most dimension.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5; const Index max_coeff_count = 7 * 5 * 5 * 5 * 5;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[0]); VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@ -787,10 +794,10 @@ static void test_uniform_block_shape()
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6; const Index max_coeff_count = 5 * 5 * 5 * 5 * 6;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(6, block.block_sizes()[4]); VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
for (int i = 3; i >= 0; --i) { for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@ -802,10 +809,10 @@ static void test_uniform_block_shape()
// fully into first inner-most dimension. // fully into first inner-most dimension.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5; const Index max_coeff_count = 11 * 5 * 5 * 5 * 5;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@ -813,10 +820,10 @@ static void test_uniform_block_shape()
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7; const Index max_coeff_count = 5 * 5 * 5 * 5 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
for (int i = 3; i >= 0; --i) { for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(5, block.block_sizes()[i]); VERIFY_IS_EQUAL(5, block.block_sizes()[i]);
@ -828,10 +835,10 @@ static void test_uniform_block_shape()
// fully into first few inner-most dimensions. // fully into first few inner-most dimensions.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(7, 5, 6, 17, 7); DSizes<Index, 5> dims(7, 5, 6, 17, 7);
const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5; const Index max_coeff_count = 7 * 5 * 6 * 7 * 5;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[0]); VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -840,10 +847,10 @@ static void test_uniform_block_shape()
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
} else { } else {
DSizes<Index, 5> dims(7, 5, 6, 9, 7); DSizes<Index, 5> dims(7, 5, 6, 9, 7);
const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7; const Index max_coeff_count = 5 * 5 * 5 * 6 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(6, block.block_sizes()[3]); VERIFY_IS_EQUAL(6, block.block_sizes()[3]);
VERIFY_IS_EQUAL(5, block.block_sizes()[2]); VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@ -855,10 +862,10 @@ static void test_uniform_block_shape()
// Test shape 'UniformAllDims' with full allocation to all dims. // Test shape 'UniformAllDims' with full allocation to all dims.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(7, 5, 6, 17, 7); DSizes<Index, 5> dims(7, 5, 6, 17, 7);
const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7; const Index max_coeff_count = 7 * 5 * 6 * 17 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[0]); VERIFY_IS_EQUAL(7, block.block_sizes()[0]);
VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -867,10 +874,10 @@ static void test_uniform_block_shape()
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
} else { } else {
DSizes<Index, 5> dims(7, 5, 6, 9, 7); DSizes<Index, 5> dims(7, 5, 6, 9, 7);
const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7; const Index max_coeff_count = 7 * 5 * 6 * 9 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, TensorBlockMapper block_mapper(dims, internal::kUniformAllDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(9, block.block_sizes()[3]); VERIFY_IS_EQUAL(9, block.block_sizes()[3]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -883,17 +890,16 @@ static void test_uniform_block_shape()
template <int Layout> template <int Layout>
static void test_skewed_inner_dim_block_shape() static void test_skewed_inner_dim_block_shape()
{ {
using T = int; typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock;
typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock; typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper;
typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper;
// Test shape 'SkewedInnerDims' with partial allocation to inner-most dim. // Test shape 'SkewedInnerDims' with partial allocation to inner-most dim.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1; const Index max_coeff_count = 10 * 1 * 1 * 1 * 1;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(10, block.block_sizes()[0]); VERIFY_IS_EQUAL(10, block.block_sizes()[0]);
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(1, block.block_sizes()[i]); VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@ -901,10 +907,10 @@ static void test_skewed_inner_dim_block_shape()
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6; const Index max_coeff_count = 1 * 1 * 1 * 1 * 6;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(6, block.block_sizes()[4]); VERIFY_IS_EQUAL(6, block.block_sizes()[4]);
for (int i = 3; i >= 0; --i) { for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(1, block.block_sizes()[i]); VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@ -915,10 +921,10 @@ static void test_skewed_inner_dim_block_shape()
// Test shape 'SkewedInnerDims' with full allocation to inner-most dim. // Test shape 'SkewedInnerDims' with full allocation to inner-most dim.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1; const Index max_coeff_count = 11 * 1 * 1 * 1 * 1;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
for (int i = 1; i < 5; ++i) { for (int i = 1; i < 5; ++i) {
VERIFY_IS_EQUAL(1, block.block_sizes()[i]); VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@ -926,10 +932,10 @@ static void test_skewed_inner_dim_block_shape()
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7; const Index max_coeff_count = 1 * 1 * 1 * 1 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
for (int i = 3; i >= 0; --i) { for (int i = 3; i >= 0; --i) {
VERIFY_IS_EQUAL(1, block.block_sizes()[i]); VERIFY_IS_EQUAL(1, block.block_sizes()[i]);
@ -941,10 +947,10 @@ static void test_skewed_inner_dim_block_shape()
// and partial allocation to second inner-dim. // and partial allocation to second inner-dim.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1; const Index max_coeff_count = 11 * 3 * 1 * 1 * 1;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
VERIFY_IS_EQUAL(3, block.block_sizes()[1]); VERIFY_IS_EQUAL(3, block.block_sizes()[1]);
for (int i = 2; i < 5; ++i) { for (int i = 2; i < 5; ++i) {
@ -953,10 +959,10 @@ static void test_skewed_inner_dim_block_shape()
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7; const Index max_coeff_count = 1 * 1 * 1 * 15 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(15, block.block_sizes()[3]); VERIFY_IS_EQUAL(15, block.block_sizes()[3]);
for (int i = 2; i >= 0; --i) { for (int i = 2; i >= 0; --i) {
@ -969,10 +975,10 @@ static void test_skewed_inner_dim_block_shape()
// and partial allocation to third inner-dim. // and partial allocation to third inner-dim.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1; const Index max_coeff_count = 11 * 5 * 5 * 1 * 1;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
VERIFY_IS_EQUAL(5, block.block_sizes()[2]); VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@ -982,10 +988,10 @@ static void test_skewed_inner_dim_block_shape()
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7; const Index max_coeff_count = 1 * 1 * 5 * 17 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(17, block.block_sizes()[3]); VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
VERIFY_IS_EQUAL(5, block.block_sizes()[2]); VERIFY_IS_EQUAL(5, block.block_sizes()[2]);
@ -998,10 +1004,10 @@ static void test_skewed_inner_dim_block_shape()
// Test shape 'SkewedInnerDims' with full allocation to all dims. // Test shape 'SkewedInnerDims' with full allocation to all dims.
if (Layout == ColMajor) { if (Layout == ColMajor) {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7; const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(11, block.block_sizes()[0]);
VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -1010,10 +1016,10 @@ static void test_skewed_inner_dim_block_shape()
VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); VERIFY(block.block_sizes().TotalSize() <= max_coeff_count);
} else { } else {
DSizes<Index, 5> dims(11, 5, 6, 17, 7); DSizes<Index, 5> dims(11, 5, 6, 17, 7);
const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7; const Index max_coeff_count = 11 * 5 * 6 * 17 * 7;
TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims,
max_coeff_count); max_coeff_count);
TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); TensorBlock block = block_mapper.GetBlockForIndex(0, NULL);
VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(7, block.block_sizes()[4]);
VERIFY_IS_EQUAL(17, block.block_sizes()[3]); VERIFY_IS_EQUAL(17, block.block_sizes()[3]);
VERIFY_IS_EQUAL(6, block.block_sizes()[2]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]);
@ -1026,15 +1032,13 @@ static void test_skewed_inner_dim_block_shape()
template <int Layout> template <int Layout>
static void test_empty_dims(const internal::TensorBlockShapeType block_shape) static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
{ {
using T = int;
// Test blocking of tensors with zero dimensions: // Test blocking of tensors with zero dimensions:
// - we must not crash on asserts and divisions by zero // - we must not crash on asserts and divisions by zero
// - we must not return block with zero dimensions // - we must not return block with zero dimensions
// (recipe for overflows/underflows, divisions by zero and NaNs later) // (recipe for overflows/underflows, divisions by zero and NaNs later)
// - total block count must be zero // - total block count must be zero
{ {
typedef internal::TensorBlockMapper<T, Index, 1, Layout> TensorBlockMapper; typedef internal::TensorBlockMapper<int, Index, 1, Layout> TensorBlockMapper;
DSizes<Index, 1> dims(0); DSizes<Index, 1> dims(0);
for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) { for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) {
TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count); TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count);
@ -1044,7 +1048,7 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape)
} }
{ {
typedef internal::TensorBlockMapper<T, Index, 2, Layout> TensorBlockMapper; typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper;
for (int dim1 = 0; dim1 < 3; ++dim1) { for (int dim1 = 0; dim1 < 3; ++dim1) {
for (int dim2 = 0; dim2 < 3; ++dim2) { for (int dim2 = 0; dim2 < 3; ++dim2) {
DSizes<Index, 2> dims(dim1, dim2); DSizes<Index, 2> dims(dim1, dim2);
@ -1098,8 +1102,8 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) {
TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides); TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides);
TEST_LAYOUTS(test_uniform_block_shape); TEST_LAYOUTS(test_uniform_block_shape);
TEST_LAYOUTS(test_skewed_inner_dim_block_shape); TEST_LAYOUTS(test_skewed_inner_dim_block_shape);
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kUniformAllDims); TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kUniformAllDims);
TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kSkewedInnerDims); TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kSkewedInnerDims);
} }
#undef TEST_LAYOUTS #undef TEST_LAYOUTS

View File

@ -56,7 +56,7 @@ static void test_static_dimension_failure()
// either the code should change to // either the code should change to
// Tensor<int, 2>::Dimensions{{2, 3}} // Tensor<int, 2>::Dimensions{{2, 3}}
// or Tensor<int, 2>::Dimensions{Tensor<int, 2>::Dimensions{{2, 3}}} // or Tensor<int, 2>::Dimensions{Tensor<int, 2>::Dimensions{{2, 3}}}
.concatenate(right.reshape(Tensor<int, 2>::Dimensions{{2, 3}}), 0); .concatenate(right.reshape(Tensor<int, 2>::Dimensions(2, 3)), 0);
} }
template<int DataLayout> template<int DataLayout>

View File

@ -514,7 +514,7 @@ static void test_const_inputs()
struct SqrtOutputKernel { struct SqrtOutputKernel {
template <typename Index, typename Scalar> template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()( EIGEN_ALWAYS_INLINE void operator()(
const OutputKernel::OutputMapper<Index, Scalar>& output_mapper, const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
const TensorContractionParams&, Index, Index, Index num_rows, const TensorContractionParams&, Index, Index, Index num_rows,
Index num_cols) const { Index num_cols) const {
for (int i = 0; i < num_rows; ++i) { for (int i = 0; i < num_rows; ++i) {
@ -553,7 +553,7 @@ static void test_large_contraction_with_output_kernel() {
m_result = m_left * m_right; m_result = m_left * m_right;
for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { for (std::ptrdiff_t i = 0; i < t_result.dimensions().TotalSize(); i++) {
VERIFY(&t_result.data()[i] != &m_result.data()[i]); VERIFY(&t_result.data()[i] != &m_result.data()[i]);
VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i])); VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
} }

View File

@ -170,7 +170,6 @@ static void test_type2indexpair_list()
typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b; typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b;
typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c; typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c;
Dims0 d0;
Dims2_a d2_a; Dims2_a d2_a;
Dims2_b d2_b; Dims2_b d2_b;

View File

@ -255,7 +255,7 @@ void test_multithread_contraction_agrees_with_singlethread() {
struct SqrtOutputKernel { struct SqrtOutputKernel {
template <typename Index, typename Scalar> template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator()( EIGEN_ALWAYS_INLINE void operator()(
const OutputKernel::OutputMapper<Index, Scalar>& output_mapper, const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
const TensorContractionParams&, Index, Index, Index num_rows, const TensorContractionParams&, Index, Index, Index num_rows,
Index num_cols) const { Index num_cols) const {
for (int i = 0; i < num_rows; ++i) { for (int i = 0; i < num_rows; ++i) {
@ -300,7 +300,7 @@ static void test_multithread_contraction_with_output_kernel() {
m_result = m_left * m_right; m_result = m_left * m_right;
for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) {
VERIFY(&t_result.data()[i] != &m_result.data()[i]); VERIFY(&t_result.data()[i] != &m_result.data()[i]);
VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i])); VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
} }
@ -428,7 +428,7 @@ void test_threadpool_allocate(TestAllocator* allocator)
void* ptr = device.allocate(512); void* ptr = device.allocate(512);
device.deallocate(ptr); device.deallocate(ptr);
} }
VERIFY(allocator != nullptr); VERIFY(allocator != NULL);
VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs); VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs);
VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs); VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs);
} }
@ -460,7 +460,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool)
CALL_SUBTEST_6(test_multithread_random()); CALL_SUBTEST_6(test_multithread_random());
TestAllocator test_allocator; TestAllocator test_allocator;
CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>(nullptr)); CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>(NULL));
CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>(&test_allocator)); CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>(&test_allocator));
CALL_SUBTEST_6(test_threadpool_allocate(&test_allocator)); CALL_SUBTEST_6(test_threadpool_allocate(&test_allocator));
} }

View File

@ -9,6 +9,7 @@
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifdef EIGEN_TEST_PART_1 #ifdef EIGEN_TEST_PART_1
#include "sparse.h" #include "sparse.h"
@ -236,7 +237,6 @@ EIGEN_DECLARE_TEST(kronecker_product)
#ifdef EIGEN_TEST_PART_2 #ifdef EIGEN_TEST_PART_2
// simply check that for a dense kronecker product, sparse module is not needed // simply check that for a dense kronecker product, sparse module is not needed
#include "main.h" #include "main.h"
#include <Eigen/KroneckerProduct> #include <Eigen/KroneckerProduct>

View File

@ -23,9 +23,8 @@ inline bool test_isApprox_abs(const Type1& a, const Type2& b)
// Returns a matrix with eigenvalues clustered around 0, 1 and 2. // Returns a matrix with eigenvalues clustered around 0, 1 and 2.
template<typename MatrixType> template<typename MatrixType>
MatrixType randomMatrixWithRealEivals(const typename MatrixType::Index size) MatrixType randomMatrixWithRealEivals(const Index size)
{ {
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::RealScalar RealScalar;
MatrixType diag = MatrixType::Zero(size, size); MatrixType diag = MatrixType::Zero(size, size);
@ -42,16 +41,15 @@ template <typename MatrixType, int IsComplex = NumTraits<typename internal::trai
struct randomMatrixWithImagEivals struct randomMatrixWithImagEivals
{ {
// Returns a matrix with eigenvalues clustered around 0 and +/- i. // Returns a matrix with eigenvalues clustered around 0 and +/- i.
static MatrixType run(const typename MatrixType::Index size); static MatrixType run(const Index size);
}; };
// Partial specialization for real matrices // Partial specialization for real matrices
template<typename MatrixType> template<typename MatrixType>
struct randomMatrixWithImagEivals<MatrixType, 0> struct randomMatrixWithImagEivals<MatrixType, 0>
{ {
static MatrixType run(const typename MatrixType::Index size) static MatrixType run(const Index size)
{ {
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
MatrixType diag = MatrixType::Zero(size, size); MatrixType diag = MatrixType::Zero(size, size);
Index i = 0; Index i = 0;
@ -77,9 +75,8 @@ struct randomMatrixWithImagEivals<MatrixType, 0>
template<typename MatrixType> template<typename MatrixType>
struct randomMatrixWithImagEivals<MatrixType, 1> struct randomMatrixWithImagEivals<MatrixType, 1>
{ {
static MatrixType run(const typename MatrixType::Index size) static MatrixType run(const Index size)
{ {
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::RealScalar RealScalar;
const Scalar imagUnit(0, 1); const Scalar imagUnit(0, 1);
@ -171,7 +168,6 @@ void testMatrixType(const MatrixType& m)
{ {
// Matrices with clustered eigenvalue lead to different code paths // Matrices with clustered eigenvalue lead to different code paths
// in MatrixFunction.h and are thus useful for testing. // in MatrixFunction.h and are thus useful for testing.
typedef typename MatrixType::Index Index;
const Index size = m.rows(); const Index size = m.rows();
for (int i = 0; i < g_repeat; i++) { for (int i = 0; i < g_repeat; i++) {

View File

@ -318,10 +318,6 @@ EIGEN_DECLARE_TEST(openglsupport)
GLint prg_id = createShader(vtx,frg); GLint prg_id = createShader(vtx,frg);
typedef Vector2d Vector2d;
typedef Vector3d Vector3d;
typedef Vector4d Vector4d;
VERIFY_UNIFORM(dv,v2d, Vector2d); VERIFY_UNIFORM(dv,v2d, Vector2d);
VERIFY_UNIFORM(dv,v3d, Vector3d); VERIFY_UNIFORM(dv,v3d, Vector3d);
VERIFY_UNIFORM(dv,v4d, Vector4d); VERIFY_UNIFORM(dv,v4d, Vector4d);

View File

@ -30,7 +30,6 @@ struct increment_if_fixed_size
template<int Deg, typename POLYNOMIAL, typename SOLVER> template<int Deg, typename POLYNOMIAL, typename SOLVER>
bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve ) bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve )
{ {
typedef typename POLYNOMIAL::Index Index;
typedef typename POLYNOMIAL::Scalar Scalar; typedef typename POLYNOMIAL::Scalar Scalar;
typedef typename POLYNOMIAL::RealScalar RealScalar; typedef typename POLYNOMIAL::RealScalar RealScalar;