This commit is contained in:
Gael Guennebaud 2018-10-08 17:35:18 +02:00
commit 649d4758a6
23 changed files with 473 additions and 345 deletions

View File

@ -420,7 +420,7 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
if(size != m_rows*m_cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols);
if (size)
if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
@ -497,7 +497,7 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
if(size != _Rows*m_cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols);
if (size)
if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;
@ -573,7 +573,7 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
if(size != m_rows*_Cols)
{
internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows);
if (size)
if (size>0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative
m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
else
m_data = 0;

View File

@ -1217,7 +1217,8 @@ inline int log2(int x)
/** \returns the square root of \a x.
*
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
* It is essentially equivalent to
* \code using std::sqrt; return sqrt(x); \endcode
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
* specializations when SSE is enabled.
*

View File

@ -28,7 +28,7 @@ namespace internal {
#endif
#endif
#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
// have overloads for both types without linking error.
// One solution is to increase ABI version using -fabi-version=4 (or greater).

View File

@ -379,10 +379,12 @@
#include <cuda_fp16.h>
#endif
#if defined(EIGEN_HIP_DEVICE_COMPILE)
#if defined(EIGEN_HIPCC)
#define EIGEN_VECTORIZE_GPU
#include <hip/hip_vector_types.h>
#endif
#if defined(EIGEN_HIP_DEVICE_COMPILE)
#define EIGEN_HAS_HIP_FP16
#include <hip/hip_fp16.h>

View File

@ -55,7 +55,9 @@ public:
operator int() const { return value; }
FixedInt() {}
FixedInt( VariableAndFixedInt<N> other) {
EIGEN_ONLY_USED_FOR_DEBUG(other);
#ifndef EIGEN_INTERNAL_DEBUGGING
EIGEN_UNUSED_VARIABLE(other);
#endif
eigen_internal_assert(int(other)==N);
}

View File

@ -96,10 +96,16 @@ inline void throw_std_bad_alloc()
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
* Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
*/
inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
eigen_assert(alignment >= sizeof(void*) && (alignment & -alignment) == alignment && "Alignment must be at least sizeof(void*) and a power of 2");
#if defined(EIGEN_HIP_DEVICE_COMPILE)
void *original = ::malloc(size+alignment);
#else
void *original = std::malloc(size+alignment);
#endif
if (original == 0) return 0;
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
*(reinterpret_cast<void**>(aligned) - 1) = original;
@ -107,9 +113,15 @@ inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = E
}
/** \internal Frees memory allocated with handmade_aligned_malloc */
inline void handmade_aligned_free(void *ptr)
EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
{
if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
if (ptr) {
#if defined(EIGEN_HIP_DEVICE_COMPILE)
::free(*(reinterpret_cast<void**>(ptr) - 1));
#else
std::free(*(reinterpret_cast<void**>(ptr) - 1));
#endif
}
}
/** \internal
@ -872,6 +884,15 @@ public:
~aligned_allocator() {}
#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
// In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
// eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
size_type max_size() const {
return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
}
#endif
pointer allocate(size_type num, const void* /*hint*/ = 0)
{
internal::check_size_for_overflow<T>(num);

View File

@ -105,7 +105,7 @@ EIGEN_DEVICE_FUNC
inline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,
internal::get_compiletime_reshape_size<NRowsType,NColsType,SizeAtCompileTime>::value,
internal::get_compiletime_reshape_size<NColsType,NRowsType,SizeAtCompileTime>::value,
Order==AutoOrder?Flags&RowMajorBit:Order>
(Order==AutoOrder?Flags&RowMajorBit:Order)>
reshaped(NRowsType nRows, NColsType nCols) EIGEN_RESHAPED_METHOD_CONST
{
return Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived,
@ -128,7 +128,7 @@ reshaped() EIGEN_RESHAPED_METHOD_CONST
template<int Order>
EIGEN_DEVICE_FUNC
inline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived, SizeAtCompileTime, 1, Order==AutoOrder?Flags&RowMajorBit:Order>
inline Reshaped<EIGEN_RESHAPED_METHOD_CONST Derived, SizeAtCompileTime, 1, (Order==AutoOrder?Flags&RowMajorBit:Order)>
reshaped() EIGEN_RESHAPED_METHOD_CONST
{
EIGEN_STATIC_ASSERT(Order==RowMajor || Order==ColMajor || Order==AutoOrder, INVALID_TEMPLATE_PARAMETER);

View File

@ -15,6 +15,14 @@
#ifdef EIGEN_TEST_PART_3
// Make sure we also check c++98 max implementation
#define EIGEN_MAX_CPP_VER 03
// We need to disable this warning when compiling with c++11 while limiting Eigen to c++98
// Ideally we would rather configure the compiler to build in c++98 mode but this needs
// to be done at the CMakeLists.txt level.
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
#pragma GCC diagnostic ignored "-Wdeprecated"
#endif
#endif
#include <valarray>

View File

@ -255,8 +255,8 @@ void test_ref_overloads()
void test_ref_fixed_size_assert()
{
Vector4f v4;
VectorXf vx(10);
Vector4f v4 = Vector4f::Random();
VectorXf vx = VectorXf::Random(10);
VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = v4; (void)y; );
VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = vx.head<4>(); (void)y; );
VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = v4; (void)y; );

View File

@ -18,7 +18,7 @@ void check_stddeque_matrix(const MatrixType& m)
Index rows = m.rows();
Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::deque<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType(rows,cols)), w(20, y);
std::deque<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType::Zero(rows,cols)), w(20, y);
v.front() = x;
w.front() = w.back();
VERIFY_IS_APPROX(w.front(), w.back());
@ -33,7 +33,7 @@ void check_stddeque_matrix(const MatrixType& m)
++wi;
}
v.resize(21);
v.resize(21,MatrixType::Zero(rows,cols));
v.back() = x;
VERIFY_IS_APPROX(v.back(), x);
v.resize(22,y);
@ -46,8 +46,8 @@ template<typename TransformType>
void check_stddeque_transform(const TransformType&)
{
typedef typename TransformType::MatrixType MatrixType;
TransformType x(MatrixType::Random()), y(MatrixType::Random());
std::deque<TransformType,Eigen::aligned_allocator<TransformType> > v(10), w(20, y);
TransformType x(MatrixType::Random()), y(MatrixType::Random()), ti=TransformType::Identity();
std::deque<TransformType,Eigen::aligned_allocator<TransformType> > v(10,ti), w(20, y);
v.front() = x;
w.front() = w.back();
VERIFY_IS_APPROX(w.front(), w.back());
@ -62,7 +62,7 @@ void check_stddeque_transform(const TransformType&)
++wi;
}
v.resize(21);
v.resize(21,ti);
v.back() = x;
VERIFY_IS_APPROX(v.back(), x);
v.resize(22,y);
@ -75,8 +75,8 @@ template<typename QuaternionType>
void check_stddeque_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::deque<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10), w(20, y);
QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
std::deque<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10,qi), w(20, y);
v.front() = x;
w.front() = w.back();
VERIFY_IS_APPROX(w.front(), w.back());
@ -91,7 +91,7 @@ void check_stddeque_quaternion(const QuaternionType&)
++wi;
}
v.resize(21);
v.resize(21,qi);
v.back() = x;
VERIFY_IS_APPROX(v.back(), x);
v.resize(22,y);

View File

@ -31,7 +31,7 @@ void check_stddeque_matrix(const MatrixType& m)
Index rows = m.rows();
Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::deque<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
std::deque<MatrixType> v(10, MatrixType::Zero(rows,cols)), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
@ -64,8 +64,8 @@ template<typename TransformType>
void check_stddeque_transform(const TransformType&)
{
typedef typename TransformType::MatrixType MatrixType;
TransformType x(MatrixType::Random()), y(MatrixType::Random());
std::deque<TransformType> v(10), w(20, y);
TransformType x(MatrixType::Random()), y(MatrixType::Random()), ti=TransformType::Identity();
std::deque<TransformType> v(10,ti), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
@ -75,7 +75,7 @@ void check_stddeque_transform(const TransformType&)
VERIFY_IS_APPROX(w[i], v[i]);
}
v.resize(21);
v.resize(21,ti);
v[20] = x;
VERIFY_IS_APPROX(v[20], x);
v.resize(22,y);
@ -98,8 +98,8 @@ template<typename QuaternionType>
void check_stddeque_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::deque<QuaternionType> v(10), w(20, y);
QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
std::deque<QuaternionType> v(10,qi), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
@ -109,7 +109,7 @@ void check_stddeque_quaternion(const QuaternionType&)
VERIFY_IS_APPROX(w[i], v[i]);
}
v.resize(21);
v.resize(21,qi);
v[20] = x;
VERIFY_IS_APPROX(v[20], x);
v.resize(22,y);

View File

@ -18,7 +18,7 @@ void check_stdlist_matrix(const MatrixType& m)
Index rows = m.rows();
Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::list<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType(rows,cols)), w(20, y);
std::list<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType::Zero(rows,cols)), w(20, y);
v.front() = x;
w.front() = w.back();
VERIFY_IS_APPROX(w.front(), w.back());
@ -33,7 +33,7 @@ void check_stdlist_matrix(const MatrixType& m)
++wi;
}
v.resize(21);
v.resize(21, MatrixType::Zero(rows,cols));
v.back() = x;
VERIFY_IS_APPROX(v.back(), x);
v.resize(22,y);
@ -46,8 +46,8 @@ template<typename TransformType>
void check_stdlist_transform(const TransformType&)
{
typedef typename TransformType::MatrixType MatrixType;
TransformType x(MatrixType::Random()), y(MatrixType::Random());
std::list<TransformType,Eigen::aligned_allocator<TransformType> > v(10), w(20, y);
TransformType x(MatrixType::Random()), y(MatrixType::Random()), ti=TransformType::Identity();
std::list<TransformType,Eigen::aligned_allocator<TransformType> > v(10,ti), w(20, y);
v.front() = x;
w.front() = w.back();
VERIFY_IS_APPROX(w.front(), w.back());
@ -62,7 +62,7 @@ void check_stdlist_transform(const TransformType&)
++wi;
}
v.resize(21);
v.resize(21, ti);
v.back() = x;
VERIFY_IS_APPROX(v.back(), x);
v.resize(22,y);
@ -75,8 +75,8 @@ template<typename QuaternionType>
void check_stdlist_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::list<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10), w(20, y);
QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
std::list<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10,qi), w(20, y);
v.front() = x;
w.front() = w.back();
VERIFY_IS_APPROX(w.front(), w.back());
@ -91,7 +91,7 @@ void check_stdlist_quaternion(const QuaternionType&)
++wi;
}
v.resize(21);
v.resize(21,qi);
v.back() = x;
VERIFY_IS_APPROX(v.back(), x);
v.resize(22,y);

View File

@ -47,7 +47,7 @@ void check_stdlist_matrix(const MatrixType& m)
Index rows = m.rows();
Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::list<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
std::list<MatrixType> v(10, MatrixType::Zero(rows,cols)), w(20, y);
typename std::list<MatrixType>::iterator itv = get(v, 5);
typename std::list<MatrixType>::iterator itw = get(w, 6);
*itv = x;
@ -86,8 +86,8 @@ template<typename TransformType>
void check_stdlist_transform(const TransformType&)
{
typedef typename TransformType::MatrixType MatrixType;
TransformType x(MatrixType::Random()), y(MatrixType::Random());
std::list<TransformType> v(10), w(20, y);
TransformType x(MatrixType::Random()), y(MatrixType::Random()), ti=TransformType::Identity();
std::list<TransformType> v(10,ti), w(20, y);
typename std::list<TransformType>::iterator itv = get(v, 5);
typename std::list<TransformType>::iterator itw = get(w, 6);
*itv = x;
@ -103,7 +103,7 @@ void check_stdlist_transform(const TransformType&)
++itw;
}
v.resize(21);
v.resize(21, ti);
set(v, 20, x);
VERIFY_IS_APPROX(*get(v, 20), x);
v.resize(22,y);
@ -126,8 +126,8 @@ template<typename QuaternionType>
void check_stdlist_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::list<QuaternionType> v(10), w(20, y);
QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
std::list<QuaternionType> v(10,qi), w(20, y);
typename std::list<QuaternionType>::iterator itv = get(v, 5);
typename std::list<QuaternionType>::iterator itw = get(w, 6);
*itv = x;
@ -143,7 +143,7 @@ void check_stdlist_quaternion(const QuaternionType&)
++itw;
}
v.resize(21);
v.resize(21,qi);
set(v, 20, x);
VERIFY_IS_APPROX(*get(v, 20), x);
v.resize(22,y);

View File

@ -17,7 +17,7 @@ void check_stdvector_matrix(const MatrixType& m)
Index rows = m.rows();
Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::vector<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType(rows,cols)), w(20, y);
std::vector<MatrixType,Eigen::aligned_allocator<MatrixType> > v(10, MatrixType::Zero(rows,cols)), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
@ -86,8 +86,8 @@ template<typename QuaternionType>
void check_stdvector_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::vector<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10), w(20, y);
QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
std::vector<QuaternionType,Eigen::aligned_allocator<QuaternionType> > v(10,qi), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
@ -117,6 +117,16 @@ void check_stdvector_quaternion(const QuaternionType&)
}
}
// the code below triggered an invalid warning with gcc >= 7
// eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
// This has been reported to gcc there: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
void std_vector_gcc_warning()
{
typedef Eigen::Vector3f T;
std::vector<T, Eigen::aligned_allocator<T> > v;
v.push_back(T());
}
EIGEN_DECLARE_TEST(stdvector)
{
// some non vectorizable fixed sizes

View File

@ -31,7 +31,7 @@ void check_stdvector_matrix(const MatrixType& m)
Index rows = m.rows();
Index cols = m.cols();
MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols);
std::vector<MatrixType> v(10, MatrixType(rows,cols)), w(20, y);
std::vector<MatrixType> v(10, MatrixType::Zero(rows,cols)), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);
@ -100,8 +100,8 @@ template<typename QuaternionType>
void check_stdvector_quaternion(const QuaternionType&)
{
typedef typename QuaternionType::Coefficients Coefficients;
QuaternionType x(Coefficients::Random()), y(Coefficients::Random());
std::vector<QuaternionType> v(10), w(20, y);
QuaternionType x(Coefficients::Random()), y(Coefficients::Random()), qi=QuaternionType::Identity();
std::vector<QuaternionType> v(10,qi), w(20, y);
v[5] = x;
w[6] = v[5];
VERIFY_IS_APPROX(w[6], v[5]);

View File

@ -186,21 +186,21 @@ struct TensorContractionKernel {
/*ConjugateLhs*/ false, /*ConjugateRhs*/ false>
GebpKernel;
EIGEN_DONT_INLINE
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
static void packLhs(LhsScalar* lhsBlock,
const typename LhsMapper::SubMapper& data_mapper,
const StorageIndex depth, const StorageIndex rows) {
LhsPacker()(lhsBlock, data_mapper, depth, rows, /*stride*/ 0, /*offset*/ 0);
}
EIGEN_DONT_INLINE
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
static void packRhs(RhsScalar* rhsBlock,
const typename RhsMapper::SubMapper& data_mapper,
const StorageIndex depth, const StorageIndex cols) {
RhsPacker()(rhsBlock, data_mapper, depth, cols);
}
EIGEN_DONT_INLINE
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
static void invoke(const OutputMapper& output_mapper,
const LhsScalar* lhsBlock, const RhsScalar* rhsBlock,
const StorageIndex rows, const StorageIndex depth,
@ -667,8 +667,8 @@ struct TensorContractionEvaluatorBase
this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));
this->template evalGemmPartial<lhs_inner_dim_contiguous,
rhs_inner_dim_contiguous,
rhs_inner_dim_reordered, Alignment>(buffer,
0, k, 1);
rhs_inner_dim_reordered,
Alignment, true>(buffer, 0, k, 1);
}
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,
@ -681,7 +681,7 @@ struct TensorContractionEvaluatorBase
num_threads);
}
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment, bool use_output_kernel = true>
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment, bool use_output_kernel>
EIGEN_DEVICE_FUNC void evalGemmPartial(Scalar* buffer, Index k_start, Index k_end, int num_threads) const {
eigen_assert(k_end >= k_start && k_start >= 0 && k_end <= this->m_k_size);
// columns in slice on left side, rows on right side

View File

@ -794,7 +794,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
Index num_blocks = divup<Index>(k, block_size);
// we use 'result' for the first block's partial result.
MaxSizeVector<Scalar*> block_buffers(num_blocks - 1);
Barrier barrier(num_blocks);
Barrier barrier(internal::convert_index<int>(num_blocks));
auto process_block = [=, &barrier](Scalar* buf, Index begin, Index end) {
::memset(buf, 0, m * n * sizeof(Scalar));
TENSOR_CONTRACTION_DISPATCH(

View File

@ -195,6 +195,14 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
m_impl.getResourceRequirements(resources);
}
// required in block(OutputTensorBlock* output_block) const
// For C++03 compatibility this must be defined outside the method
struct BlockIteratorState {
Index stride;
Index span;
Index size;
Index count;
};
// TODO(andydavis) Reduce the overhead of this function.
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
OutputTensorBlock* output_block) const {
@ -219,12 +227,6 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
}
// Initialize output block iterator state.
struct BlockIteratorState {
Index stride;
Index span;
Index size;
Index count;
};
array<BlockIteratorState, NumOutputDims> block_iter_state;
for (Index i = 0; i < NumOutputDims; ++i) {

View File

@ -218,6 +218,7 @@ struct InnerMostDimReducer<Self, Op, false, true> {
}
};
#if !defined(EIGEN_HIPCC)
template <typename Self, typename Op>
struct InnerMostDimReducer<Self, Op, true, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
@ -257,7 +258,8 @@ struct InnerMostDimReducer<Self, Op, true, true> {
}
}
};
#endif
template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
struct InnerMostDimPreserver {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {

View File

@ -292,7 +292,7 @@ __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input,
}
template <typename Op>
__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) {
__global__ void ReductionCleanupKernelHalfFloat(Op reducer, half* output, half2* scratch) {
eigen_assert(threadIdx.x == 1);
half tmp = __low2half(*scratch);
reducer.reduce(__high2half(*scratch), &tmp);

View File

@ -124,7 +124,11 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
m_stride = m_stride * dims[i];
}
} else {
for (int i = NumDims - 1; i > op.axis(); --i) {
// dims can only be indexed through unsigned integers,
// so let's use an unsigned type to let the compiler knows.
// This prevents stupid warnings: ""'*((void*)(& evaluator)+64)[18446744073709551615]' may be used uninitialized in this function"
unsigned int axis = internal::convert_index<unsigned int>(op.axis());
for (unsigned int i = NumDims - 1; i > axis; --i) {
m_stride = m_stride * dims[i];
}
}

View File

@ -225,11 +225,11 @@ static void test_simple_reductions() {
Tensor<int, 1> ints(10);
std::iota(ints.data(), ints.data() + ints.dimension(0), 0);
TensorFixedSize<bool, Sizes<> > all;
all = ints.all();
VERIFY(!all());
all = (ints >= ints.constant(0)).all();
VERIFY(all());
TensorFixedSize<bool, Sizes<> > all_;
all_ = ints.all();
VERIFY(!all_());
all_ = (ints >= ints.constant(0)).all();
VERIFY(all_());
TensorFixedSize<bool, Sizes<> > any;
any = (ints > ints.constant(10)).any();

File diff suppressed because it is too large Load Diff