mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-24 02:29:33 +08:00
merge
This commit is contained in:
commit
8f2bdde373
@ -359,6 +359,7 @@ using std::ptrdiff_t;
|
|||||||
#include "src/Core/arch/ZVector/Complex.h"
|
#include "src/Core/arch/ZVector/Complex.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "src/Core/arch/CUDA/Complex.h"
|
||||||
// Half float support
|
// Half float support
|
||||||
#include "src/Core/arch/CUDA/Half.h"
|
#include "src/Core/arch/CUDA/Half.h"
|
||||||
#include "src/Core/arch/CUDA/PacketMathHalf.h"
|
#include "src/Core/arch/CUDA/PacketMathHalf.h"
|
||||||
|
@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
|
|||||||
*
|
*
|
||||||
* The function generates 'size' equally spaced values in the closed interval [low,high].
|
* The function generates 'size' equally spaced values in the closed interval [low,high].
|
||||||
* This particular version of LinSpaced() uses sequential access, i.e. vector access is
|
* This particular version of LinSpaced() uses sequential access, i.e. vector access is
|
||||||
* assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization
|
* assumed to be a(0), a(1), ..., a(size-1). This assumption allows for better vectorization
|
||||||
* and yields faster code than the random access version.
|
* and yields faster code than the random access version.
|
||||||
*
|
*
|
||||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||||
@ -389,7 +389,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
|
|||||||
/**
|
/**
|
||||||
* \brief Sets a linearly spaced vector.
|
* \brief Sets a linearly spaced vector.
|
||||||
*
|
*
|
||||||
* The function fill *this with equally spaced values in the closed interval [low,high].
|
* The function fills *this with equally spaced values in the closed interval [low,high].
|
||||||
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
* When size is set to 1, a vector of length 1 containing 'high' is returned.
|
||||||
*
|
*
|
||||||
* \only_for_vectors
|
* \only_for_vectors
|
||||||
|
@ -159,20 +159,20 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
|
|||||||
template<typename Scalar,int Size,int MaxSize>
|
template<typename Scalar,int Size,int MaxSize>
|
||||||
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
|
||||||
{
|
{
|
||||||
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
|
|
||||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
|
|
||||||
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
|
||||||
#else
|
|
||||||
// Some architectures cannot align on the stack,
|
|
||||||
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
|
||||||
enum {
|
enum {
|
||||||
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
|
ForceAlignment = internal::packet_traits<Scalar>::Vectorizable,
|
||||||
PacketSize = internal::packet_traits<Scalar>::size
|
PacketSize = internal::packet_traits<Scalar>::size
|
||||||
};
|
};
|
||||||
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
|
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
|
||||||
|
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
|
||||||
|
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
|
||||||
|
#else
|
||||||
|
// Some architectures cannot align on the stack,
|
||||||
|
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
|
||||||
|
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
|
||||||
EIGEN_STRONG_INLINE Scalar* data() {
|
EIGEN_STRONG_INLINE Scalar* data() {
|
||||||
return ForceAlignment
|
return ForceAlignment
|
||||||
? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
|
? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
|
||||||
: m_data.array;
|
: m_data.array;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -207,7 +207,7 @@ template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
|
|||||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
||||||
|
|
||||||
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
|
typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
|
||||||
|
|
||||||
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
|
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
|
||||||
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
|
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
|
||||||
|
@ -29,8 +29,12 @@ T generic_fast_tanh_float(const T& a_x)
|
|||||||
// this range is +/-1.0f in single-precision.
|
// this range is +/-1.0f in single-precision.
|
||||||
const T plus_9 = pset1<T>(9.f);
|
const T plus_9 = pset1<T>(9.f);
|
||||||
const T minus_9 = pset1<T>(-9.f);
|
const T minus_9 = pset1<T>(-9.f);
|
||||||
const T x = pmax(minus_9, pmin(plus_9, a_x));
|
// NOTE GCC prior to 6.3 might improperly optimize this max/min
|
||||||
|
// step such that if a_x is nan, x will be either 9 or -9,
|
||||||
|
// and tanh will return 1 or -1 instead of nan.
|
||||||
|
// This is supposed to be fixed in gcc6.3,
|
||||||
|
// see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
|
||||||
|
const T x = pmax(minus_9,pmin(plus_9,a_x));
|
||||||
// The monomial coefficients of the numerator polynomial (odd).
|
// The monomial coefficients of the numerator polynomial (odd).
|
||||||
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
|
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
|
||||||
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
|
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
|
||||||
|
@ -330,15 +330,11 @@ template<typename Derived> class MatrixBase
|
|||||||
|
|
||||||
/////////// LU module ///////////
|
/////////// LU module ///////////
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
|
||||||
inline const FullPivLU<PlainObject> fullPivLu() const;
|
inline const FullPivLU<PlainObject> fullPivLu() const;
|
||||||
EIGEN_DEVICE_FUNC
|
|
||||||
inline const PartialPivLU<PlainObject> partialPivLu() const;
|
inline const PartialPivLU<PlainObject> partialPivLu() const;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
|
||||||
inline const PartialPivLU<PlainObject> lu() const;
|
inline const PartialPivLU<PlainObject> lu() const;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
|
||||||
inline const Inverse<Derived> inverse() const;
|
inline const Inverse<Derived> inverse() const;
|
||||||
|
|
||||||
template<typename ResultType>
|
template<typename ResultType>
|
||||||
|
88
Eigen/src/Core/arch/CUDA/Complex.h
Normal file
88
Eigen/src/Core/arch/CUDA/Complex.h
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_COMPLEX_CUDA_H
|
||||||
|
#define EIGEN_COMPLEX_CUDA_H
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
||||||
|
|
||||||
|
// Many std::complex methods such as operator+, operator-, operator* and
|
||||||
|
// operator/ are not constexpr. Due to this, clang does not treat them as device
|
||||||
|
// functions and thus Eigen functors making use of these operators fail to
|
||||||
|
// compile. Here, we manually specialize these functors for complex types when
|
||||||
|
// building for CUDA to avoid non-constexpr methods.
|
||||||
|
|
||||||
|
template<typename T> struct scalar_sum_op<std::complex<T>> {
|
||||||
|
typedef typename std::complex<T> result_type;
|
||||||
|
|
||||||
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||||
|
return std::complex<T>(numext::real(a) + numext::real(b),
|
||||||
|
numext::imag(a) + numext::imag(b));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> struct scalar_difference_op<std::complex<T>> {
|
||||||
|
typedef typename std::complex<T> result_type;
|
||||||
|
|
||||||
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||||
|
return std::complex<T>(numext::real(a) - numext::real(b),
|
||||||
|
numext::imag(a) - numext::imag(b));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> struct scalar_product_op<std::complex<T>, std::complex<T>> {
|
||||||
|
enum {
|
||||||
|
Vectorizable = packet_traits<std::complex<T>>::HasMul
|
||||||
|
};
|
||||||
|
typedef typename std::complex<T> result_type;
|
||||||
|
|
||||||
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||||
|
const T a_real = numext::real(a);
|
||||||
|
const T a_imag = numext::imag(a);
|
||||||
|
const T b_real = numext::real(b);
|
||||||
|
const T b_imag = numext::imag(b);
|
||||||
|
return std::complex<T>(a_real * b_real - a_imag * b_imag,
|
||||||
|
a_real * b_imag + a_imag * b_real);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> struct scalar_quotient_op<std::complex<T>, std::complex<T>> {
|
||||||
|
enum {
|
||||||
|
Vectorizable = packet_traits<std::complex<T>>::HasDiv
|
||||||
|
};
|
||||||
|
typedef typename std::complex<T> result_type;
|
||||||
|
|
||||||
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const std::complex<T> operator() (const std::complex<T>& a, const std::complex<T>& b) const {
|
||||||
|
const T a_real = numext::real(a);
|
||||||
|
const T a_imag = numext::imag(a);
|
||||||
|
const T b_real = numext::real(b);
|
||||||
|
const T b_imag = numext::imag(b);
|
||||||
|
const T norm = T(1) / (b_real * b_real + b_imag * b_imag);
|
||||||
|
return std::complex<T>((a_real * b_real + a_imag * b_imag) * norm,
|
||||||
|
(a_imag * b_real - a_real * b_imag) * norm);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_COMPLEX_CUDA_H
|
@ -179,7 +179,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
|
|||||||
{
|
{
|
||||||
typedef typename Dest::Scalar ResScalar;
|
typedef typename Dest::Scalar ResScalar;
|
||||||
typedef typename Rhs::Scalar RhsScalar;
|
typedef typename Rhs::Scalar RhsScalar;
|
||||||
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
|
typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
|
||||||
|
|
||||||
eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
|
eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
|
||||||
|
|
||||||
|
@ -216,7 +216,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
|
|||||||
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
typedef internal::blas_traits<Rhs> RhsBlasTraits;
|
||||||
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
|
||||||
|
|
||||||
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
|
typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
|
||||||
|
|
||||||
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
|
||||||
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
|
||||||
|
@ -671,6 +671,14 @@ struct scalar_div_cost {
|
|||||||
enum { value = 8*NumTraits<T>::MulCost };
|
enum { value = 8*NumTraits<T>::MulCost };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename T,bool Vectorized>
|
||||||
|
struct scalar_div_cost<std::complex<T>, Vectorized> {
|
||||||
|
enum { value = 2*scalar_div_cost<T>::value
|
||||||
|
+ 6*NumTraits<T>::MulCost
|
||||||
|
+ 3*NumTraits<T>::AddCost
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template<bool Vectorized>
|
template<bool Vectorized>
|
||||||
struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
|
struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
|
||||||
|
@ -55,7 +55,12 @@ MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
|
|||||||
res[0] = atan2(coeff(j,i), coeff(k,i));
|
res[0] = atan2(coeff(j,i), coeff(k,i));
|
||||||
if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0)))
|
if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0)))
|
||||||
{
|
{
|
||||||
res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
|
if(res[0] > Scalar(0)) {
|
||||||
|
res[0] -= Scalar(EIGEN_PI);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
res[0] += Scalar(EIGEN_PI);
|
||||||
|
}
|
||||||
Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm();
|
Scalar s2 = Vector2(coeff(j,i), coeff(k,i)).norm();
|
||||||
res[1] = -atan2(s2, coeff(i,i));
|
res[1] = -atan2(s2, coeff(i,i));
|
||||||
}
|
}
|
||||||
@ -84,7 +89,12 @@ MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
|
|||||||
res[0] = atan2(coeff(j,k), coeff(k,k));
|
res[0] = atan2(coeff(j,k), coeff(k,k));
|
||||||
Scalar c2 = Vector2(coeff(i,i), coeff(i,j)).norm();
|
Scalar c2 = Vector2(coeff(i,i), coeff(i,j)).norm();
|
||||||
if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0))) {
|
if((odd && res[0]<Scalar(0)) || ((!odd) && res[0]>Scalar(0))) {
|
||||||
res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
|
if(res[0] > Scalar(0)) {
|
||||||
|
res[0] -= Scalar(EIGEN_PI);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
res[0] += Scalar(EIGEN_PI);
|
||||||
|
}
|
||||||
res[1] = atan2(-coeff(i,k), -c2);
|
res[1] = atan2(-coeff(i,k), -c2);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -119,7 +119,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheLeft(
|
|||||||
{
|
{
|
||||||
*this *= Scalar(1)-tau;
|
*this *= Scalar(1)-tau;
|
||||||
}
|
}
|
||||||
else
|
else if(tau!=Scalar(0))
|
||||||
{
|
{
|
||||||
Map<typename internal::plain_row_type<PlainObject>::type> tmp(workspace,cols());
|
Map<typename internal::plain_row_type<PlainObject>::type> tmp(workspace,cols());
|
||||||
Block<Derived, EssentialPart::SizeAtCompileTime, Derived::ColsAtCompileTime> bottom(derived(), 1, 0, rows()-1, cols());
|
Block<Derived, EssentialPart::SizeAtCompileTime, Derived::ColsAtCompileTime> bottom(derived(), 1, 0, rows()-1, cols());
|
||||||
@ -156,7 +156,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheRight(
|
|||||||
{
|
{
|
||||||
*this *= Scalar(1)-tau;
|
*this *= Scalar(1)-tau;
|
||||||
}
|
}
|
||||||
else
|
else if(tau!=Scalar(0))
|
||||||
{
|
{
|
||||||
Map<typename internal::plain_col_type<PlainObject>::type> tmp(workspace,rows());
|
Map<typename internal::plain_col_type<PlainObject>::type> tmp(workspace,rows());
|
||||||
Block<Derived, Derived::RowsAtCompileTime, EssentialPart::SizeAtCompileTime> right(derived(), 0, 1, rows(), cols()-1);
|
Block<Derived, Derived::RowsAtCompileTime, EssentialPart::SizeAtCompileTime> right(derived(), 0, 1, rows(), cols()-1);
|
||||||
|
@ -879,7 +879,7 @@ struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_
|
|||||||
*
|
*
|
||||||
* \sa class FullPivLU
|
* \sa class FullPivLU
|
||||||
*/
|
*/
|
||||||
template<typename Derived> EIGEN_DEVICE_FUNC
|
template<typename Derived>
|
||||||
inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
|
inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
|
||||||
MatrixBase<Derived>::fullPivLu() const
|
MatrixBase<Derived>::fullPivLu() const
|
||||||
{
|
{
|
||||||
|
@ -327,7 +327,7 @@ struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename Dst
|
|||||||
*
|
*
|
||||||
* \sa computeInverseAndDetWithCheck()
|
* \sa computeInverseAndDetWithCheck()
|
||||||
*/
|
*/
|
||||||
template<typename Derived> EIGEN_DEVICE_FUNC
|
template<typename Derived>
|
||||||
inline const Inverse<Derived> MatrixBase<Derived>::inverse() const
|
inline const Inverse<Derived> MatrixBase<Derived>::inverse() const
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
|
EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
|
||||||
|
@ -584,7 +584,7 @@ struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assi
|
|||||||
*
|
*
|
||||||
* \sa class PartialPivLU
|
* \sa class PartialPivLU
|
||||||
*/
|
*/
|
||||||
template<typename Derived> EIGEN_DEVICE_FUNC
|
template<typename Derived>
|
||||||
inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
|
inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
|
||||||
MatrixBase<Derived>::partialPivLu() const
|
MatrixBase<Derived>::partialPivLu() const
|
||||||
{
|
{
|
||||||
@ -599,7 +599,7 @@ MatrixBase<Derived>::partialPivLu() const
|
|||||||
*
|
*
|
||||||
* \sa class PartialPivLU
|
* \sa class PartialPivLU
|
||||||
*/
|
*/
|
||||||
template<typename Derived> EIGEN_DEVICE_FUNC
|
template<typename Derived>
|
||||||
inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
|
inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
|
||||||
MatrixBase<Derived>::lu() const
|
MatrixBase<Derived>::lu() const
|
||||||
{
|
{
|
||||||
|
@ -1,10 +1,13 @@
|
|||||||
|
|
||||||
find_package(BLAZE)
|
find_package(BLAZE)
|
||||||
find_package(Boost)
|
find_package(Boost COMPONENTS system)
|
||||||
if (BLAZE_FOUND AND Boost_FOUND)
|
if (BLAZE_FOUND AND Boost_FOUND)
|
||||||
include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
|
include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
|
||||||
btl_add_bench(btl_blaze main.cpp)
|
btl_add_bench(btl_blaze main.cpp)
|
||||||
|
# Note: The newest blaze version requires C++14.
|
||||||
|
# Ideally, we should set this depending on the version of Blaze we found
|
||||||
|
set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14)
|
||||||
if(BUILD_btl_blaze)
|
if(BUILD_btl_blaze)
|
||||||
target_link_libraries(btl_blaze ${Boost_LIBRARIES} ${Boost_system_LIBRARY} /opt/local/lib/libboost_system-mt.a )
|
target_link_libraries(btl_blaze ${Boost_LIBRARIES})
|
||||||
endif()
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -53,6 +53,33 @@ showing that the program works as expected:
|
|||||||
|
|
||||||
This implementation of \c makeCirculant is much simpler than \ref TopicNewExpressionType "defining a new expression" from scratch.
|
This implementation of \c makeCirculant is much simpler than \ref TopicNewExpressionType "defining a new expression" from scratch.
|
||||||
|
|
||||||
|
|
||||||
|
\section NullaryExpr_Indexing Example 2: indexing rows and columns
|
||||||
|
|
||||||
|
The goal here is to mimic MatLab's ability to index a matrix through two vectors of indices referencing the rows and columns to be picked respectively, like this:
|
||||||
|
|
||||||
|
\snippet nullary_indexing.out main1
|
||||||
|
|
||||||
|
To this end, let us first write a nullary-functor storing references to the input matrix and to the two arrays of indices, and implementing the required \c operator()(i,j):
|
||||||
|
|
||||||
|
\snippet nullary_indexing.cpp functor
|
||||||
|
|
||||||
|
Then, let's create an \c indexing(A,rows,cols) function creating the nullary expression:
|
||||||
|
|
||||||
|
\snippet nullary_indexing.cpp function
|
||||||
|
|
||||||
|
Finally, here is an example of how this function can be used:
|
||||||
|
|
||||||
|
\snippet nullary_indexing.cpp main1
|
||||||
|
|
||||||
|
This straightforward implementation is already quite powerful as the row or column index arrays can also be expressions to perform offsetting, modulo, striding, reverse, etc.
|
||||||
|
|
||||||
|
\snippet nullary_indexing.cpp main2
|
||||||
|
|
||||||
|
and the output is:
|
||||||
|
|
||||||
|
\snippet nullary_indexing.out main2
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -14,3 +14,8 @@ foreach(example_src ${examples_SRCS})
|
|||||||
)
|
)
|
||||||
add_dependencies(all_examples ${example})
|
add_dependencies(all_examples ${example})
|
||||||
endforeach(example_src)
|
endforeach(example_src)
|
||||||
|
|
||||||
|
check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
|
||||||
|
if(EIGEN_COMPILER_SUPPORT_CPP11)
|
||||||
|
ei_add_target_property(nullary_indexing COMPILE_FLAGS "-std=c++11")
|
||||||
|
endif()
|
66
doc/examples/nullary_indexing.cpp
Normal file
66
doc/examples/nullary_indexing.cpp
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
#include <Eigen/Core>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using namespace Eigen;
|
||||||
|
|
||||||
|
// [functor]
|
||||||
|
template<class ArgType, class RowIndexType, class ColIndexType>
|
||||||
|
class indexing_functor {
|
||||||
|
const ArgType &m_arg;
|
||||||
|
const RowIndexType &m_rowIndices;
|
||||||
|
const ColIndexType &m_colIndices;
|
||||||
|
public:
|
||||||
|
typedef Matrix<typename ArgType::Scalar,
|
||||||
|
RowIndexType::SizeAtCompileTime,
|
||||||
|
ColIndexType::SizeAtCompileTime,
|
||||||
|
ArgType::Flags&RowMajorBit?RowMajor:ColMajor,
|
||||||
|
RowIndexType::MaxSizeAtCompileTime,
|
||||||
|
ColIndexType::MaxSizeAtCompileTime> MatrixType;
|
||||||
|
|
||||||
|
indexing_functor(const ArgType& arg, const RowIndexType& row_indices, const ColIndexType& col_indices)
|
||||||
|
: m_arg(arg), m_rowIndices(row_indices), m_colIndices(col_indices)
|
||||||
|
{}
|
||||||
|
|
||||||
|
const typename ArgType::Scalar& operator() (Index row, Index col) const {
|
||||||
|
return m_arg(m_rowIndices[row], m_colIndices[col]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// [functor]
|
||||||
|
|
||||||
|
// [function]
|
||||||
|
template <class ArgType, class RowIndexType, class ColIndexType>
|
||||||
|
CwiseNullaryOp<indexing_functor<ArgType,RowIndexType,ColIndexType>, typename indexing_functor<ArgType,RowIndexType,ColIndexType>::MatrixType>
|
||||||
|
indexing(const Eigen::MatrixBase<ArgType>& arg, const RowIndexType& row_indices, const ColIndexType& col_indices)
|
||||||
|
{
|
||||||
|
typedef indexing_functor<ArgType,RowIndexType,ColIndexType> Func;
|
||||||
|
typedef typename Func::MatrixType MatrixType;
|
||||||
|
return MatrixType::NullaryExpr(row_indices.size(), col_indices.size(), Func(arg.derived(), row_indices, col_indices));
|
||||||
|
}
|
||||||
|
// [function]
|
||||||
|
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
std::cout << "[main1]\n";
|
||||||
|
Eigen::MatrixXi A = Eigen::MatrixXi::Random(4,4);
|
||||||
|
Array3i ri(1,2,1);
|
||||||
|
ArrayXi ci(6); ci << 3,2,1,0,0,2;
|
||||||
|
Eigen::MatrixXi B = indexing(A, ri, ci);
|
||||||
|
std::cout << "A =" << std::endl;
|
||||||
|
std::cout << A << std::endl << std::endl;
|
||||||
|
std::cout << "A([" << ri.transpose() << "], [" << ci.transpose() << "]) =" << std::endl;
|
||||||
|
std::cout << B << std::endl;
|
||||||
|
std::cout << "[main1]\n";
|
||||||
|
|
||||||
|
std::cout << "[main2]\n";
|
||||||
|
B = indexing(A, ri+1, ci);
|
||||||
|
std::cout << "A(ri+1,ci) =" << std::endl;
|
||||||
|
std::cout << B << std::endl << std::endl;
|
||||||
|
#if __cplusplus >= 201103L
|
||||||
|
B = indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3));
|
||||||
|
std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl;
|
||||||
|
std::cout << B << std::endl << std::endl;
|
||||||
|
#endif
|
||||||
|
std::cout << "[main2]\n";
|
||||||
|
}
|
||||||
|
|
@ -417,6 +417,7 @@ void cholesky_faillure_cases()
|
|||||||
VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix());
|
VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix());
|
||||||
VERIFY(ldlt.info()==NumericalIssue);
|
VERIFY(ldlt.info()==NumericalIssue);
|
||||||
}
|
}
|
||||||
|
#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE_SSE2)
|
||||||
{
|
{
|
||||||
mat.resize(3,3);
|
mat.resize(3,3);
|
||||||
mat << -1, -3, 3,
|
mat << -1, -3, 3,
|
||||||
@ -426,6 +427,7 @@ void cholesky_faillure_cases()
|
|||||||
VERIFY(ldlt.info()==NumericalIssue);
|
VERIFY(ldlt.info()==NumericalIssue);
|
||||||
VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix());
|
VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix());
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
mat.resize(3,3);
|
mat.resize(3,3);
|
||||||
mat << 1, 2, 3,
|
mat << 1, 2, 3,
|
||||||
|
@ -49,7 +49,8 @@ void check_inf_nan(bool dryrun) {
|
|||||||
VERIFY( !m.allFinite() );
|
VERIFY( !m.allFinite() );
|
||||||
VERIFY( m.hasNaN() );
|
VERIFY( m.hasNaN() );
|
||||||
}
|
}
|
||||||
m(4) /= T(0.0);
|
T hidden_zero = (std::numeric_limits<T>::min)()*(std::numeric_limits<T>::min)();
|
||||||
|
m(4) /= hidden_zero;
|
||||||
if(dryrun)
|
if(dryrun)
|
||||||
{
|
{
|
||||||
std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n";
|
std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n";
|
||||||
|
@ -365,6 +365,7 @@ template<typename Scalar> void packetmath_real()
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (PacketTraits::HasTanh) {
|
if (PacketTraits::HasTanh) {
|
||||||
|
// NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details.
|
||||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||||
packet_helper<internal::packet_traits<Scalar>::HasTanh,Packet> h;
|
packet_helper<internal::packet_traits<Scalar>::HasTanh,Packet> h;
|
||||||
h.store(data2, internal::ptanh(h.load(data1)));
|
h.store(data2, internal::ptanh(h.load(data1)));
|
||||||
|
@ -213,7 +213,8 @@ void test_product_small()
|
|||||||
{
|
{
|
||||||
for(int i = 0; i < g_repeat; i++) {
|
for(int i = 0; i < g_repeat; i++) {
|
||||||
CALL_SUBTEST_1( product(Matrix<float, 3, 2>()) );
|
CALL_SUBTEST_1( product(Matrix<float, 3, 2>()) );
|
||||||
CALL_SUBTEST_2( product(Matrix<int, 3, 5>()) );
|
CALL_SUBTEST_2( product(Matrix<int, 3, 17>()) );
|
||||||
|
CALL_SUBTEST_8( product(Matrix<double, 3, 17>()) );
|
||||||
CALL_SUBTEST_3( product(Matrix3d()) );
|
CALL_SUBTEST_3( product(Matrix3d()) );
|
||||||
CALL_SUBTEST_4( product(Matrix4d()) );
|
CALL_SUBTEST_4( product(Matrix4d()) );
|
||||||
CALL_SUBTEST_5( product(Matrix4f()) );
|
CALL_SUBTEST_5( product(Matrix4f()) );
|
||||||
|
@ -14,6 +14,8 @@ template<>
|
|||||||
Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); }
|
Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); }
|
||||||
template<>
|
template<>
|
||||||
Array4d four_denorms() { return Array4d(5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324); }
|
Array4d four_denorms() { return Array4d(5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324); }
|
||||||
|
template<typename T>
|
||||||
|
Array<T,4,1> four_denorms() { return four_denorms<double>().cast<T>(); }
|
||||||
|
|
||||||
template<typename MatrixType>
|
template<typename MatrixType>
|
||||||
void svd_fill_random(MatrixType &m, int Option = 0)
|
void svd_fill_random(MatrixType &m, int Option = 0)
|
||||||
|
@ -168,39 +168,20 @@ struct GpuDevice {
|
|||||||
return stream_->stream();
|
return stream_->stream();
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
return stream_->allocate(num_bytes);
|
return stream_->allocate(num_bytes);
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
return NULL;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
stream_->deallocate(buffer);
|
stream_->deallocate(buffer);
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* scratchpad() const {
|
EIGEN_STRONG_INLINE void* scratchpad() const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
return stream_->scratchpad();
|
return stream_->scratchpad();
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
return NULL;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const {
|
EIGEN_STRONG_INLINE unsigned int* semaphore() const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
return stream_->semaphore();
|
return stream_->semaphore();
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
return NULL;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
@ -210,30 +191,22 @@ struct GpuDevice {
|
|||||||
EIGEN_UNUSED_VARIABLE(err)
|
EIGEN_UNUSED_VARIABLE(err)
|
||||||
assert(err == cudaSuccess);
|
assert(err == cudaSuccess);
|
||||||
#else
|
#else
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
|
EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
cudaError_t err =
|
cudaError_t err =
|
||||||
cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream());
|
cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream());
|
||||||
EIGEN_UNUSED_VARIABLE(err)
|
EIGEN_UNUSED_VARIABLE(err)
|
||||||
assert(err == cudaSuccess);
|
assert(err == cudaSuccess);
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
|
EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
cudaError_t err =
|
cudaError_t err =
|
||||||
cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream());
|
cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream());
|
||||||
EIGEN_UNUSED_VARIABLE(err)
|
EIGEN_UNUSED_VARIABLE(err)
|
||||||
assert(err == cudaSuccess);
|
assert(err == cudaSuccess);
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||||
@ -242,21 +215,21 @@ struct GpuDevice {
|
|||||||
EIGEN_UNUSED_VARIABLE(err)
|
EIGEN_UNUSED_VARIABLE(err)
|
||||||
assert(err == cudaSuccess);
|
assert(err == cudaSuccess);
|
||||||
#else
|
#else
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const {
|
EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||||
// FIXME
|
// FIXME
|
||||||
return 32;
|
return 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
|
EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
|
||||||
// FIXME
|
// FIXME
|
||||||
return 48*1024;
|
return 48*1024;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
|
EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
|
||||||
// We won't try to take advantage of the l2 cache for the time being, and
|
// We won't try to take advantage of the l2 cache for the time being, and
|
||||||
// there is no l3 cache on cuda devices.
|
// there is no l3 cache on cuda devices.
|
||||||
return firstLevelCacheSize();
|
return firstLevelCacheSize();
|
||||||
@ -276,56 +249,26 @@ struct GpuDevice {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
|
EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
return stream_->deviceProperties().multiProcessorCount;
|
return stream_->deviceProperties().multiProcessorCount;
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
|
EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
return stream_->deviceProperties().maxThreadsPerBlock;
|
return stream_->deviceProperties().maxThreadsPerBlock;
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
|
EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
return stream_->deviceProperties().maxThreadsPerMultiProcessor;
|
return stream_->deviceProperties().maxThreadsPerMultiProcessor;
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
|
EIGEN_STRONG_INLINE int sharedMemPerBlock() const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
return stream_->deviceProperties().sharedMemPerBlock;
|
return stream_->deviceProperties().sharedMemPerBlock;
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
return stream_->deviceProperties().major;
|
return stream_->deviceProperties().major;
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int minorDeviceVersion() const {
|
EIGEN_STRONG_INLINE int minorDeviceVersion() const {
|
||||||
#ifndef __CUDA_ARCH__
|
|
||||||
return stream_->deviceProperties().minor;
|
return stream_->deviceProperties().minor;
|
||||||
#else
|
|
||||||
eigen_assert(false && "The default device should be used instead to generate kernel code");
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
|
EIGEN_STRONG_INLINE int maxBlocks() const {
|
||||||
return max_blocks_;
|
return max_blocks_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,7 +189,12 @@ namespace Eigen
|
|||||||
res[0] = atan2(mat(J,K), mat(K,K));
|
res[0] = atan2(mat(J,K), mat(K,K));
|
||||||
Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm();
|
Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm();
|
||||||
if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) {
|
if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) {
|
||||||
res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
|
if(res[0] > Scalar(0)) {
|
||||||
|
res[0] -= Scalar(EIGEN_PI);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
res[0] += Scalar(EIGEN_PI);
|
||||||
|
}
|
||||||
res[1] = atan2(-mat(I,K), -c2);
|
res[1] = atan2(-mat(I,K), -c2);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -212,7 +217,12 @@ namespace Eigen
|
|||||||
res[0] = atan2(mat(J,I), mat(K,I));
|
res[0] = atan2(mat(J,I), mat(K,I));
|
||||||
if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0)))
|
if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0)))
|
||||||
{
|
{
|
||||||
res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
|
if(res[0] > Scalar(0)) {
|
||||||
|
res[0] -= Scalar(EIGEN_PI);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
res[0] += Scalar(EIGEN_PI);
|
||||||
|
}
|
||||||
Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm();
|
Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm();
|
||||||
res[1] = -atan2(s2, mat(I,I));
|
res[1] = -atan2(s2, mat(I,I));
|
||||||
}
|
}
|
||||||
|
@ -226,6 +226,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
|
|||||||
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
||||||
|
|
||||||
ei_add_test(cxx11_tensor_complex_cuda)
|
ei_add_test(cxx11_tensor_complex_cuda)
|
||||||
|
ei_add_test(cxx11_tensor_complex_cwise_ops_cuda)
|
||||||
ei_add_test(cxx11_tensor_reduction_cuda)
|
ei_add_test(cxx11_tensor_reduction_cuda)
|
||||||
ei_add_test(cxx11_tensor_argmax_cuda)
|
ei_add_test(cxx11_tensor_argmax_cuda)
|
||||||
ei_add_test(cxx11_tensor_cast_float16_cuda)
|
ei_add_test(cxx11_tensor_cast_float16_cuda)
|
||||||
|
97
unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
Normal file
97
unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#define EIGEN_TEST_NO_LONGDOUBLE
|
||||||
|
#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops
|
||||||
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
|
||||||
|
#include <cuda_fp16.h>
|
||||||
|
#endif
|
||||||
|
#include "main.h"
|
||||||
|
#include <unsupported/Eigen/CXX11/Tensor>
|
||||||
|
|
||||||
|
using Eigen::Tensor;
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
void test_cuda_complex_cwise_ops() {
|
||||||
|
const int kNumItems = 2;
|
||||||
|
std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
|
||||||
|
|
||||||
|
std::complex<T>* d_in1;
|
||||||
|
std::complex<T>* d_in2;
|
||||||
|
std::complex<T>* d_out;
|
||||||
|
cudaMalloc((void**)(&d_in1), complex_bytes);
|
||||||
|
cudaMalloc((void**)(&d_in2), complex_bytes);
|
||||||
|
cudaMalloc((void**)(&d_out), complex_bytes);
|
||||||
|
|
||||||
|
Eigen::CudaStreamDevice stream;
|
||||||
|
Eigen::GpuDevice gpu_device(&stream);
|
||||||
|
|
||||||
|
Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
|
||||||
|
d_in1, kNumItems);
|
||||||
|
Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
|
||||||
|
d_in2, kNumItems);
|
||||||
|
Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
|
||||||
|
d_out, kNumItems);
|
||||||
|
|
||||||
|
const std::complex<T> a(3.14f, 2.7f);
|
||||||
|
const std::complex<T> b(-10.6f, 1.4f);
|
||||||
|
|
||||||
|
gpu_in1.device(gpu_device) = gpu_in1.constant(a);
|
||||||
|
gpu_in2.device(gpu_device) = gpu_in2.constant(b);
|
||||||
|
|
||||||
|
enum CwiseOp {
|
||||||
|
Add = 0,
|
||||||
|
Sub,
|
||||||
|
Mul,
|
||||||
|
Div
|
||||||
|
};
|
||||||
|
|
||||||
|
Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
|
||||||
|
for (int op = Add; op <= Div; op++) {
|
||||||
|
std::complex<T> expected;
|
||||||
|
switch (static_cast<CwiseOp>(op)) {
|
||||||
|
case Add:
|
||||||
|
gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
|
||||||
|
expected = a + b;
|
||||||
|
break;
|
||||||
|
case Sub:
|
||||||
|
gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
|
||||||
|
expected = a - b;
|
||||||
|
break;
|
||||||
|
case Mul:
|
||||||
|
gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
|
||||||
|
expected = a * b;
|
||||||
|
break;
|
||||||
|
case Div:
|
||||||
|
gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
|
||||||
|
expected = a / b;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
|
||||||
|
gpu_device.stream()) == cudaSuccess);
|
||||||
|
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||||
|
|
||||||
|
for (int i = 0; i < kNumItems; ++i) {
|
||||||
|
VERIFY_IS_APPROX(actual(i), expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaFree(d_in1);
|
||||||
|
cudaFree(d_in2);
|
||||||
|
cudaFree(d_out);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void test_cxx11_tensor_complex_cwise_ops()
|
||||||
|
{
|
||||||
|
CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
|
||||||
|
CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user