mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-09 02:09:01 +08:00
Merge with master.
This commit is contained in:
commit
8328eec90d
2019
CHANGELOG.md
Normal file
2019
CHANGELOG.md
Normal file
File diff suppressed because it is too large
Load Diff
@ -29,6 +29,11 @@ if (POLICY CMP0146)
|
||||
cmake_policy(SET CMP0146 OLD)
|
||||
endif ()
|
||||
|
||||
# Normalize DESTINATION paths
|
||||
if (POLICY CMP0177)
|
||||
cmake_policy(SET CMP0177 NEW)
|
||||
endif ()
|
||||
|
||||
#==============================================================================
|
||||
# CMake Project.
|
||||
#==============================================================================
|
||||
@ -254,7 +259,7 @@ if(EIGEN_BUILD_CMAKE_PACKAGE)
|
||||
DESTINATION ${CMAKEPACKAGE_INSTALL_DIR})
|
||||
|
||||
# Add uninstall target
|
||||
if(NOT TARGET uninstall)
|
||||
if(NOT TARGET uninstall AND PROJECT_IS_TOP_LEVEL)
|
||||
add_custom_target ( uninstall
|
||||
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake)
|
||||
endif()
|
||||
|
56
Eigen/Core
56
Eigen/Core
@ -192,45 +192,38 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/arch/Default/BFloat16.h"
|
||||
#include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h"
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX512
|
||||
#if defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/Reductions.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#include "src/Core/arch/SSE/TypeCasting.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX
|
||||
#include "src/Core/arch/AVX/PacketMath.h"
|
||||
#include "src/Core/arch/AVX/Reductions.h"
|
||||
#include "src/Core/arch/AVX/Complex.h"
|
||||
#include "src/Core/arch/AVX/TypeCasting.h"
|
||||
#include "src/Core/arch/AVX/MathFunctions.h"
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX512
|
||||
#include "src/Core/arch/AVX512/PacketMath.h"
|
||||
#include "src/Core/arch/AVX512/Reductions.h"
|
||||
#include "src/Core/arch/AVX512/Complex.h"
|
||||
#include "src/Core/arch/AVX512/TypeCasting.h"
|
||||
#include "src/Core/arch/AVX512/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX512/TrsmKernel.h"
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX512FP16
|
||||
#include "src/Core/arch/AVX512/PacketMathFP16.h"
|
||||
#endif
|
||||
#include "src/Core/arch/SSE/TypeCasting.h"
|
||||
#include "src/Core/arch/AVX/TypeCasting.h"
|
||||
#include "src/Core/arch/AVX512/TypeCasting.h"
|
||||
#if defined EIGEN_VECTORIZE_AVX512FP16
|
||||
#include "src/Core/arch/AVX512/TypeCastingFP16.h"
|
||||
#endif
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#include "src/Core/arch/AVX/Complex.h"
|
||||
#include "src/Core/arch/AVX512/Complex.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX512/MathFunctions.h"
|
||||
#if defined EIGEN_VECTORIZE_AVX512FP16
|
||||
#include "src/Core/arch/AVX512/MathFunctionsFP16.h"
|
||||
#endif
|
||||
#include "src/Core/arch/AVX512/TrsmKernel.h"
|
||||
#elif defined EIGEN_VECTORIZE_AVX
|
||||
// Use AVX for floats and doubles, SSE for integers
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/TypeCasting.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#include "src/Core/arch/AVX/PacketMath.h"
|
||||
#include "src/Core/arch/AVX/TypeCasting.h"
|
||||
#include "src/Core/arch/AVX/Complex.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/AVX/MathFunctions.h"
|
||||
#elif defined EIGEN_VECTORIZE_SSE
|
||||
#include "src/Core/arch/SSE/PacketMath.h"
|
||||
#include "src/Core/arch/SSE/TypeCasting.h"
|
||||
#include "src/Core/arch/SSE/MathFunctions.h"
|
||||
#include "src/Core/arch/SSE/Complex.h"
|
||||
#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
|
||||
|
||||
#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
|
||||
#include "src/Core/arch/AltiVec/PacketMath.h"
|
||||
#include "src/Core/arch/AltiVec/TypeCasting.h"
|
||||
#include "src/Core/arch/AltiVec/MathFunctions.h"
|
||||
@ -358,6 +351,7 @@ using std::ptrdiff_t;
|
||||
#include "src/Core/SkewSymmetricMatrix3.h"
|
||||
#include "src/Core/Redux.h"
|
||||
#include "src/Core/Visitor.h"
|
||||
#include "src/Core/FindCoeff.h"
|
||||
#include "src/Core/Fuzzy.h"
|
||||
#include "src/Core/Swap.h"
|
||||
#include "src/Core/CommaInitializer.h"
|
||||
|
@ -726,6 +726,7 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
Index count) const {
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||
PacketBlock<PacketType, NumPackets> packets;
|
||||
for (Index i = 0; i < NumPackets; i++) packets.packet[i] = pzero(PacketType());
|
||||
Index offset = begin / SrcPacketSize;
|
||||
Index actualBegin = begin % SrcPacketSize;
|
||||
for (; offset < NumPackets; offset++) {
|
||||
@ -743,6 +744,7 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
||||
Index count) const {
|
||||
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||
PacketBlock<PacketType, NumPackets> packets;
|
||||
for (Index i = 0; i < NumPackets; i++) packets.packet[i] = pzero(PacketType());
|
||||
Index offset = begin / SrcPacketSize;
|
||||
Index actualBegin = begin % SrcPacketSize;
|
||||
for (; offset < NumPackets; offset++) {
|
||||
|
@ -45,10 +45,16 @@ class DenseCoeffsBase<Derived, ReadOnlyAccessors> : public EigenBase<Derived> {
|
||||
// - This is the return type of the coeff() method.
|
||||
// - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references
|
||||
// to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value).
|
||||
// - The DirectAccessBit means exactly that the underlying data of coefficients can be directly accessed as a plain
|
||||
// strided array, which means exactly that the underlying data of coefficients does exist in memory, which means
|
||||
// exactly that the coefficients is const-referencable, which means exactly that we can have coeff() return a const
|
||||
// reference. For example, Map<const Matrix> have DirectAccessBit but not LvalueBit, so that Map<const Matrix>.coeff()
|
||||
// does points to a const Scalar& which exists in memory, while does not allow coeffRef() as it would not provide a
|
||||
// lvalue. Notice that DirectAccessBit and LvalueBit are mutually orthogonal.
|
||||
// - The is_arithmetic check is required since "const int", "const double", etc. will cause warnings on some systems
|
||||
// while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is
|
||||
// not possible, since the underlying expressions might not offer a valid address the reference could be referring to.
|
||||
typedef std::conditional_t<bool(internal::traits<Derived>::Flags& LvalueBit), const Scalar&,
|
||||
typedef std::conditional_t<bool(internal::traits<Derived>::Flags&(LvalueBit | DirectAccessBit)), const Scalar&,
|
||||
std::conditional_t<internal::is_arithmetic<Scalar>::value, Scalar, const Scalar>>
|
||||
CoeffReturnType;
|
||||
|
||||
|
464
Eigen/src/Core/FindCoeff.h
Normal file
464
Eigen/src/Core/FindCoeff.h
Normal file
@ -0,0 +1,464 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2025 Charlie Schlosser <cs.schlosser@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_FIND_COEFF_H
|
||||
#define EIGEN_FIND_COEFF_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename Scalar, int NaNPropagation, bool IsInteger = NumTraits<Scalar>::IsInteger>
|
||||
struct max_coeff_functor {
|
||||
EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const {
|
||||
return candidate > incumbent;
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const {
|
||||
return pcmp_lt(incumbent, candidate);
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const {
|
||||
return predux_max(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct max_coeff_functor<Scalar, PropagateNaN, false> {
|
||||
EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) {
|
||||
return (candidate > incumbent) || ((candidate != candidate) && (incumbent == incumbent));
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) {
|
||||
return pandnot(pcmp_lt_or_nan(incumbent, candidate), pisnan(incumbent));
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const {
|
||||
return predux_max<PropagateNaN>(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct max_coeff_functor<Scalar, PropagateNumbers, false> {
|
||||
EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const {
|
||||
return (candidate > incumbent) || ((candidate == candidate) && (incumbent != incumbent));
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const {
|
||||
return pandnot(pcmp_lt_or_nan(incumbent, candidate), pisnan(candidate));
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const {
|
||||
return predux_max<PropagateNumbers>(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar, int NaNPropagation, bool IsInteger = NumTraits<Scalar>::IsInteger>
|
||||
struct min_coeff_functor {
|
||||
EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const {
|
||||
return candidate < incumbent;
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const {
|
||||
return pcmp_lt(candidate, incumbent);
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const {
|
||||
return predux_min(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct min_coeff_functor<Scalar, PropagateNaN, false> {
|
||||
EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) {
|
||||
return (candidate < incumbent) || ((candidate != candidate) && (incumbent == incumbent));
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) {
|
||||
return pandnot(pcmp_lt_or_nan(candidate, incumbent), pisnan(incumbent));
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const {
|
||||
return predux_min<PropagateNaN>(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct min_coeff_functor<Scalar, PropagateNumbers, false> {
|
||||
EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const {
|
||||
return (candidate < incumbent) || ((candidate == candidate) && (incumbent != incumbent));
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const {
|
||||
return pandnot(pcmp_lt_or_nan(candidate, incumbent), pisnan(candidate));
|
||||
}
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const {
|
||||
return predux_min<PropagateNumbers>(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct min_max_traits {
|
||||
static constexpr bool PacketAccess = packet_traits<Scalar>::Vectorizable;
|
||||
};
|
||||
template <typename Scalar, int NaNPropagation>
|
||||
struct functor_traits<max_coeff_functor<Scalar, NaNPropagation>> : min_max_traits<Scalar> {};
|
||||
template <typename Scalar, int NaNPropagation>
|
||||
struct functor_traits<min_coeff_functor<Scalar, NaNPropagation>> : min_max_traits<Scalar> {};
|
||||
|
||||
template <typename Evaluator, typename Func, bool Linear, bool Vectorize>
|
||||
struct find_coeff_loop;
|
||||
template <typename Evaluator, typename Func>
|
||||
struct find_coeff_loop<Evaluator, Func, /*Linear*/ false, /*Vectorize*/ false> {
|
||||
using Scalar = typename Evaluator::Scalar;
|
||||
static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& res, Index& outer, Index& inner) {
|
||||
Index outerSize = eval.outerSize();
|
||||
Index innerSize = eval.innerSize();
|
||||
|
||||
/* initialization performed in calling function */
|
||||
/* result = eval.coeff(0, 0); */
|
||||
/* outer = 0; */
|
||||
/* inner = 0; */
|
||||
|
||||
for (Index j = 0; j < outerSize; j++) {
|
||||
for (Index i = 0; i < innerSize; i++) {
|
||||
Scalar xprCoeff = eval.coeffByOuterInner(j, i);
|
||||
bool newRes = func.compareCoeff(res, xprCoeff);
|
||||
if (newRes) {
|
||||
outer = j;
|
||||
inner = i;
|
||||
res = xprCoeff;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
template <typename Evaluator, typename Func>
|
||||
struct find_coeff_loop<Evaluator, Func, /*Linear*/ true, /*Vectorize*/ false> {
|
||||
using Scalar = typename Evaluator::Scalar;
|
||||
static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& res, Index& index) {
|
||||
Index size = eval.size();
|
||||
|
||||
/* initialization performed in calling function */
|
||||
/* result = eval.coeff(0); */
|
||||
/* index = 0; */
|
||||
|
||||
for (Index k = 0; k < size; k++) {
|
||||
Scalar xprCoeff = eval.coeff(k);
|
||||
bool newRes = func.compareCoeff(res, xprCoeff);
|
||||
if (newRes) {
|
||||
index = k;
|
||||
res = xprCoeff;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
template <typename Evaluator, typename Func>
|
||||
struct find_coeff_loop<Evaluator, Func, /*Linear*/ false, /*Vectorize*/ true> {
|
||||
using ScalarImpl = find_coeff_loop<Evaluator, Func, false, false>;
|
||||
using Scalar = typename Evaluator::Scalar;
|
||||
using Packet = typename Evaluator::Packet;
|
||||
static constexpr int PacketSize = unpacket_traits<Packet>::size;
|
||||
static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& result, Index& outer,
|
||||
Index& inner) {
|
||||
Index outerSize = eval.outerSize();
|
||||
Index innerSize = eval.innerSize();
|
||||
Index packetEnd = numext::round_down(innerSize, PacketSize);
|
||||
|
||||
/* initialization performed in calling function */
|
||||
/* result = eval.coeff(0, 0); */
|
||||
/* outer = 0; */
|
||||
/* inner = 0; */
|
||||
|
||||
bool checkPacket = false;
|
||||
|
||||
for (Index j = 0; j < outerSize; j++) {
|
||||
Packet resultPacket = pset1<Packet>(result);
|
||||
for (Index i = 0; i < packetEnd; i += PacketSize) {
|
||||
Packet xprPacket = eval.template packetByOuterInner<Unaligned, Packet>(j, i);
|
||||
if (predux_any(func.comparePacket(resultPacket, xprPacket))) {
|
||||
outer = j;
|
||||
inner = i;
|
||||
result = func.predux(xprPacket);
|
||||
resultPacket = pset1<Packet>(result);
|
||||
checkPacket = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (Index i = packetEnd; i < innerSize; i++) {
|
||||
Scalar xprCoeff = eval.coeffByOuterInner(j, i);
|
||||
if (func.compareCoeff(result, xprCoeff)) {
|
||||
outer = j;
|
||||
inner = i;
|
||||
result = xprCoeff;
|
||||
checkPacket = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (checkPacket) {
|
||||
result = eval.coeffByOuterInner(outer, inner);
|
||||
Index i_end = inner + PacketSize;
|
||||
for (Index i = inner; i < i_end; i++) {
|
||||
Scalar xprCoeff = eval.coeffByOuterInner(outer, i);
|
||||
if (func.compareCoeff(result, xprCoeff)) {
|
||||
inner = i;
|
||||
result = xprCoeff;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
template <typename Evaluator, typename Func>
|
||||
struct find_coeff_loop<Evaluator, Func, /*Linear*/ true, /*Vectorize*/ true> {
|
||||
using ScalarImpl = find_coeff_loop<Evaluator, Func, true, false>;
|
||||
using Scalar = typename Evaluator::Scalar;
|
||||
using Packet = typename Evaluator::Packet;
|
||||
static constexpr int PacketSize = unpacket_traits<Packet>::size;
|
||||
static constexpr int Alignment = Evaluator::Alignment;
|
||||
|
||||
static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& result, Index& index) {
|
||||
Index size = eval.size();
|
||||
Index packetEnd = numext::round_down(size, PacketSize);
|
||||
|
||||
/* initialization performed in calling function */
|
||||
/* result = eval.coeff(0); */
|
||||
/* index = 0; */
|
||||
|
||||
Packet resultPacket = pset1<Packet>(result);
|
||||
bool checkPacket = false;
|
||||
|
||||
for (Index k = 0; k < packetEnd; k += PacketSize) {
|
||||
Packet xprPacket = eval.template packet<Alignment, Packet>(k);
|
||||
if (predux_any(func.comparePacket(resultPacket, xprPacket))) {
|
||||
index = k;
|
||||
result = func.predux(xprPacket);
|
||||
resultPacket = pset1<Packet>(result);
|
||||
checkPacket = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (Index k = packetEnd; k < size; k++) {
|
||||
Scalar xprCoeff = eval.coeff(k);
|
||||
if (func.compareCoeff(result, xprCoeff)) {
|
||||
index = k;
|
||||
result = xprCoeff;
|
||||
checkPacket = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (checkPacket) {
|
||||
result = eval.coeff(index);
|
||||
Index k_end = index + PacketSize;
|
||||
for (Index k = index; k < k_end; k++) {
|
||||
Scalar xprCoeff = eval.coeff(k);
|
||||
if (func.compareCoeff(result, xprCoeff)) {
|
||||
index = k;
|
||||
result = xprCoeff;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived>
|
||||
struct find_coeff_evaluator : public evaluator<Derived> {
|
||||
using Base = evaluator<Derived>;
|
||||
using Scalar = typename Derived::Scalar;
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
static constexpr int Flags = Base::Flags;
|
||||
static constexpr bool IsRowMajor = bool(Flags & RowMajorBit);
|
||||
EIGEN_DEVICE_FUNC inline find_coeff_evaluator(const Derived& xpr) : Base(xpr), m_xpr(xpr) {}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Scalar coeffByOuterInner(Index outer, Index inner) const {
|
||||
Index row = IsRowMajor ? outer : inner;
|
||||
Index col = IsRowMajor ? inner : outer;
|
||||
return Base::coeff(row, col);
|
||||
}
|
||||
template <int LoadMode, typename PacketType>
|
||||
EIGEN_DEVICE_FUNC inline PacketType packetByOuterInner(Index outer, Index inner) const {
|
||||
Index row = IsRowMajor ? outer : inner;
|
||||
Index col = IsRowMajor ? inner : outer;
|
||||
return Base::template packet<LoadMode, PacketType>(row, col);
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Index innerSize() const { return m_xpr.innerSize(); }
|
||||
EIGEN_DEVICE_FUNC inline Index outerSize() const { return m_xpr.outerSize(); }
|
||||
EIGEN_DEVICE_FUNC inline Index size() const { return m_xpr.size(); }
|
||||
|
||||
const Derived& m_xpr;
|
||||
};
|
||||
|
||||
template <typename Derived, typename Func>
|
||||
struct find_coeff_impl {
|
||||
using Evaluator = find_coeff_evaluator<Derived>;
|
||||
static constexpr int Flags = Evaluator::Flags;
|
||||
static constexpr int Alignment = Evaluator::Alignment;
|
||||
static constexpr bool IsRowMajor = Derived::IsRowMajor;
|
||||
static constexpr int MaxInnerSizeAtCompileTime =
|
||||
IsRowMajor ? Derived::MaxColsAtCompileTime : Derived::MaxRowsAtCompileTime;
|
||||
static constexpr int MaxSizeAtCompileTime = Derived::MaxSizeAtCompileTime;
|
||||
|
||||
using Scalar = typename Derived::Scalar;
|
||||
using Packet = typename Evaluator::Packet;
|
||||
|
||||
static constexpr int PacketSize = unpacket_traits<Packet>::size;
|
||||
static constexpr bool Linearize = bool(Flags & LinearAccessBit);
|
||||
static constexpr bool DontVectorize =
|
||||
enum_lt_not_dynamic(Linearize ? MaxSizeAtCompileTime : MaxInnerSizeAtCompileTime, PacketSize);
|
||||
static constexpr bool Vectorize =
|
||||
!DontVectorize && bool(Flags & PacketAccessBit) && functor_traits<Func>::PacketAccess;
|
||||
|
||||
using Loop = find_coeff_loop<Evaluator, Func, Linearize, Vectorize>;
|
||||
|
||||
template <bool ForwardLinearAccess = Linearize, std::enable_if_t<!ForwardLinearAccess, bool> = true>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& xpr, Func& func, Scalar& res, Index& outer,
|
||||
Index& inner) {
|
||||
Evaluator eval(xpr);
|
||||
Loop::run(eval, func, res, outer, inner);
|
||||
}
|
||||
template <bool ForwardLinearAccess = Linearize, std::enable_if_t<ForwardLinearAccess, bool> = true>
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& xpr, Func& func, Scalar& res, Index& outer,
|
||||
Index& inner) {
|
||||
// where possible, use the linear loop and back-calculate the outer and inner indices
|
||||
Index index = 0;
|
||||
run(xpr, func, res, index);
|
||||
outer = index / xpr.innerSize();
|
||||
inner = index % xpr.innerSize();
|
||||
}
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& xpr, Func& func, Scalar& res, Index& index) {
|
||||
Evaluator eval(xpr);
|
||||
Loop::run(eval, func, res, index);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived, typename IndexType, typename Func>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar findCoeff(const DenseBase<Derived>& mat, Func& func,
|
||||
IndexType* rowPtr, IndexType* colPtr) {
|
||||
eigen_assert(mat.rows() > 0 && mat.cols() > 0 && "you are using an empty matrix");
|
||||
using Scalar = typename DenseBase<Derived>::Scalar;
|
||||
using FindCoeffImpl = internal::find_coeff_impl<Derived, Func>;
|
||||
Index outer = 0;
|
||||
Index inner = 0;
|
||||
Scalar res = mat.coeff(0, 0);
|
||||
FindCoeffImpl::run(mat.derived(), func, res, outer, inner);
|
||||
*rowPtr = internal::convert_index<IndexType>(Derived::IsRowMajor ? outer : inner);
|
||||
if (colPtr) *colPtr = internal::convert_index<IndexType>(Derived::IsRowMajor ? inner : outer);
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename Derived, typename IndexType, typename Func>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar findCoeff(const DenseBase<Derived>& mat, Func& func,
|
||||
IndexType* indexPtr) {
|
||||
eigen_assert(mat.size() > 0 && "you are using an empty matrix");
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
using Scalar = typename DenseBase<Derived>::Scalar;
|
||||
using FindCoeffImpl = internal::find_coeff_impl<Derived, Func>;
|
||||
Index index = 0;
|
||||
Scalar res = mat.coeff(0);
|
||||
FindCoeffImpl::run(mat.derived(), func, res, index);
|
||||
*indexPtr = internal::convert_index<IndexType>(index);
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
|
||||
/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||
*
|
||||
* If there are multiple coefficients with the same extreme value, the location of the first instance is returned.
|
||||
*
|
||||
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||
* NaNPropagation == PropagateFast : undefined
|
||||
* NaNPropagation == PropagateNaN : result is NaN
|
||||
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
||||
*/
|
||||
template <typename Derived>
|
||||
template <int NaNPropagation, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar DenseBase<Derived>::minCoeff(IndexType* rowPtr,
|
||||
IndexType* colPtr) const {
|
||||
using Func = internal::min_coeff_functor<Scalar, NaNPropagation>;
|
||||
Func func;
|
||||
return internal::findCoeff(derived(), func, rowPtr, colPtr);
|
||||
}
|
||||
|
||||
/** \returns the minimum of all coefficients of *this and puts in *index its location.
|
||||
*
|
||||
* If there are multiple coefficients with the same extreme value, the location of the first instance is returned.
|
||||
*
|
||||
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||
* NaNPropagation == PropagateFast : undefined
|
||||
* NaNPropagation == PropagateNaN : result is NaN
|
||||
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(),
|
||||
* DenseBase::minCoeff()
|
||||
*/
|
||||
template <typename Derived>
|
||||
template <int NaNPropagation, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar DenseBase<Derived>::minCoeff(IndexType* indexPtr) const {
|
||||
using Func = internal::min_coeff_functor<Scalar, NaNPropagation>;
|
||||
Func func;
|
||||
return internal::findCoeff(derived(), func, indexPtr);
|
||||
}
|
||||
|
||||
/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||
*
|
||||
* If there are multiple coefficients with the same extreme value, the location of the first instance is returned.
|
||||
*
|
||||
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||
* NaNPropagation == PropagateFast : undefined
|
||||
* NaNPropagation == PropagateNaN : result is NaN
|
||||
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
||||
*/
|
||||
template <typename Derived>
|
||||
template <int NaNPropagation, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar DenseBase<Derived>::maxCoeff(IndexType* rowPtr,
|
||||
IndexType* colPtr) const {
|
||||
using Func = internal::max_coeff_functor<Scalar, NaNPropagation>;
|
||||
Func func;
|
||||
return internal::findCoeff(derived(), func, rowPtr, colPtr);
|
||||
}
|
||||
|
||||
/** \returns the maximum of all coefficients of *this and puts in *index its location.
|
||||
*
|
||||
* If there are multiple coefficients with the same extreme value, the location of the first instance is returned.
|
||||
*
|
||||
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||
* NaNPropagation == PropagateFast : undefined
|
||||
* NaNPropagation == PropagateNaN : result is NaN
|
||||
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||
*
|
||||
* \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(),
|
||||
* DenseBase::maxCoeff()
|
||||
*/
|
||||
template <typename Derived>
|
||||
template <int NaNPropagation, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar DenseBase<Derived>::maxCoeff(IndexType* indexPtr) const {
|
||||
using Func = internal::max_coeff_functor<Scalar, NaNPropagation>;
|
||||
Func func;
|
||||
return internal::findCoeff(derived(), func, indexPtr);
|
||||
}
|
||||
|
||||
} // namespace Eigen
|
||||
|
||||
#endif // EIGEN_FIND_COEFF_H
|
@ -375,7 +375,7 @@ EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) {
|
||||
return a && b;
|
||||
}
|
||||
|
||||
// In the generic case, memset to all one bits.
|
||||
// In the generic packet case, memset to all one bits.
|
||||
template <typename Packet, typename EnableIf = void>
|
||||
struct ptrue_impl {
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
|
||||
@ -385,19 +385,16 @@ struct ptrue_impl {
|
||||
}
|
||||
};
|
||||
|
||||
// Use a value of one for scalars.
|
||||
template <typename Scalar>
|
||||
struct ptrue_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
|
||||
static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); }
|
||||
};
|
||||
|
||||
// For booleans, we can only directly set a valid `bool` value to avoid UB.
|
||||
template <>
|
||||
struct ptrue_impl<bool, void> {
|
||||
static EIGEN_DEVICE_FUNC inline bool run(const bool& /*a*/) { return true; }
|
||||
};
|
||||
|
||||
// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
|
||||
// Although this is technically not a valid bitmask, the scalar path for pselect
|
||||
// uses a comparison to zero, so this should still work in most cases. We don't
|
||||
// have another option, since the scalar type requires initialization.
|
||||
template <typename T>
|
||||
struct ptrue_impl<T, std::enable_if_t<is_scalar<T>::value && NumTraits<T>::RequireInitialization>> {
|
||||
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(1); }
|
||||
static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; }
|
||||
};
|
||||
|
||||
/** \internal \returns one bits. */
|
||||
@ -406,7 +403,7 @@ EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
|
||||
return ptrue_impl<Packet>::run(a);
|
||||
}
|
||||
|
||||
// In the general case, memset to zero.
|
||||
// In the general packet case, memset to zero.
|
||||
template <typename Packet, typename EnableIf = void>
|
||||
struct pzero_impl {
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
|
||||
@ -608,7 +605,7 @@ EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, con
|
||||
|
||||
/** \internal \returns the min or of \a a and \a b (coeff-wise)
|
||||
If either \a a or \a b are NaN, the result is implementation defined. */
|
||||
template <int NaNPropagation>
|
||||
template <int NaNPropagation, bool IsInteger>
|
||||
struct pminmax_impl {
|
||||
template <typename Packet, typename Op>
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
||||
@ -619,7 +616,7 @@ struct pminmax_impl {
|
||||
/** \internal \returns the min or max of \a a and \a b (coeff-wise)
|
||||
If either \a a or \a b are NaN, NaN is returned. */
|
||||
template <>
|
||||
struct pminmax_impl<PropagateNaN> {
|
||||
struct pminmax_impl<PropagateNaN, false> {
|
||||
template <typename Packet, typename Op>
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
||||
Packet not_nan_mask_a = pcmp_eq(a, a);
|
||||
@ -632,7 +629,7 @@ struct pminmax_impl<PropagateNaN> {
|
||||
If both \a a and \a b are NaN, NaN is returned.
|
||||
Equivalent to std::fmin(a, b). */
|
||||
template <>
|
||||
struct pminmax_impl<PropagateNumbers> {
|
||||
struct pminmax_impl<PropagateNumbers, false> {
|
||||
template <typename Packet, typename Op>
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
|
||||
Packet not_nan_mask_a = pcmp_eq(a, a);
|
||||
@ -654,7 +651,8 @@ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
|
||||
NaNPropagation determines the NaN propagation semantics. */
|
||||
template <int NaNPropagation, typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
|
||||
return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
|
||||
constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
|
||||
return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
|
||||
}
|
||||
|
||||
/** \internal \returns the max of \a a and \a b (coeff-wise)
|
||||
@ -668,7 +666,8 @@ EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
|
||||
NaNPropagation determines the NaN propagation semantics. */
|
||||
template <int NaNPropagation, typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
|
||||
return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
|
||||
constexpr bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger;
|
||||
return pminmax_impl<NaNPropagation, IsInteger>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
|
||||
}
|
||||
|
||||
/** \internal \returns the absolute value of \a a */
|
||||
@ -873,17 +872,29 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_trait
|
||||
return a;
|
||||
}
|
||||
|
||||
template <typename Packet, typename EnableIf = void>
|
||||
struct peven_mask_impl {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
const size_t n = unpacket_traits<Packet>::size;
|
||||
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
|
||||
}
|
||||
return ploadu<Packet>(elements);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct peven_mask_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value>> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); }
|
||||
};
|
||||
|
||||
/** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
|
||||
where x is the value of all 1-bits. */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& /*a*/) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
const size_t n = unpacket_traits<Packet>::size;
|
||||
EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
|
||||
}
|
||||
return ploadu<Packet>(elements);
|
||||
EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) {
|
||||
return peven_mask_impl<Packet>::run(a);
|
||||
}
|
||||
|
||||
/** \internal copy the packet \a from to \a *to, \a to must be properly aligned */
|
||||
@ -1244,26 +1255,46 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<Scalar>)));
|
||||
}
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
|
||||
}
|
||||
|
||||
/** \internal \returns the min of the elements of \a a */
|
||||
/** \internal \returns the max of the elements of \a a */
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<Scalar>)));
|
||||
}
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
struct predux_min_max_helper_impl {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits<Scalar>::IsInteger;
|
||||
template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
|
||||
static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
|
||||
}
|
||||
template <bool UsePredux = UsePredux_, std::enable_if_t<!UsePredux, bool> = true>
|
||||
static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
|
||||
}
|
||||
template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
|
||||
static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) {
|
||||
return predux_min(a);
|
||||
}
|
||||
template <bool UsePredux = UsePredux_, std::enable_if_t<UsePredux, bool> = true>
|
||||
static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) {
|
||||
return predux_max(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
|
||||
return predux_min_max_helper_impl<NaNPropagation, Packet>::run_min(a);
|
||||
}
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
|
||||
return predux_min_max_helper_impl<NaNPropagation, Packet>::run_max(a);
|
||||
}
|
||||
|
||||
#undef EIGEN_BINARY_OP_NAN_PROPAGATION
|
||||
|
@ -182,10 +182,6 @@ struct imag_ref_retval {
|
||||
typedef typename NumTraits<Scalar>::Real& type;
|
||||
};
|
||||
|
||||
// implementation in MathFunctionsImpl.h
|
||||
template <typename Mask, bool is_built_in_float = std::is_floating_point<Mask>::value>
|
||||
struct scalar_select_mask;
|
||||
|
||||
} // namespace internal
|
||||
|
||||
namespace numext {
|
||||
@ -211,9 +207,9 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar&
|
||||
return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Mask>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar select(const Mask& mask, const Scalar& a, const Scalar& b) {
|
||||
return internal::scalar_select_mask<Mask>::run(mask) ? b : a;
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar select(const Scalar& mask, const Scalar& a, const Scalar& b) {
|
||||
return numext::is_exactly_zero(mask) ? b : a;
|
||||
}
|
||||
|
||||
} // namespace numext
|
||||
|
@ -256,48 +256,6 @@ EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z) {
|
||||
return ComplexT(numext::log(a), b);
|
||||
}
|
||||
|
||||
// For generic scalars, use ternary select.
|
||||
template <typename Mask>
|
||||
struct scalar_select_mask<Mask, /*is_built_in_float*/ false> {
|
||||
static EIGEN_DEVICE_FUNC inline bool run(const Mask& mask) { return numext::is_exactly_zero(mask); }
|
||||
};
|
||||
|
||||
// For built-in float mask, bitcast the mask to its integer counterpart and use ternary select.
|
||||
template <typename Mask>
|
||||
struct scalar_select_mask<Mask, /*is_built_in_float*/ true> {
|
||||
using IntegerType = typename numext::get_integer_by_size<sizeof(Mask)>::unsigned_type;
|
||||
static EIGEN_DEVICE_FUNC inline bool run(const Mask& mask) {
|
||||
return numext::is_exactly_zero(numext::bit_cast<IntegerType>(std::abs(mask)));
|
||||
}
|
||||
};
|
||||
|
||||
template <int Size = sizeof(long double)>
|
||||
struct ldbl_select_mask {
|
||||
static constexpr int MantissaDigits = std::numeric_limits<long double>::digits;
|
||||
static constexpr int NumBytes = (MantissaDigits == 64 ? 80 : 128) / CHAR_BIT;
|
||||
static EIGEN_DEVICE_FUNC inline bool run(const long double& mask) {
|
||||
const uint8_t* mask_bytes = reinterpret_cast<const uint8_t*>(&mask);
|
||||
for (Index i = 0; i < NumBytes; i++) {
|
||||
if (mask_bytes[i] != 0) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ldbl_select_mask<sizeof(double)> : scalar_select_mask<double> {};
|
||||
|
||||
template <>
|
||||
struct scalar_select_mask<long double, true> : ldbl_select_mask<> {};
|
||||
|
||||
template <typename RealMask>
|
||||
struct scalar_select_mask<std::complex<RealMask>, false> {
|
||||
using impl = scalar_select_mask<RealMask>;
|
||||
static EIGEN_DEVICE_FUNC inline bool run(const std::complex<RealMask>& mask) {
|
||||
return impl::run(numext::real(mask)) && impl::run(numext::imag(mask));
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -851,7 +851,7 @@ struct generic_product_impl<Lhs, Rhs, SelfAdjointShape, DenseShape, ProductTag>
|
||||
|
||||
template <typename Dest>
|
||||
static EIGEN_DEVICE_FUNC void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
|
||||
selfadjoint_product_impl<typename Lhs::MatrixType, Lhs::Mode, false, Rhs, 0, Rhs::IsVectorAtCompileTime>::run(
|
||||
selfadjoint_product_impl<typename Lhs::MatrixType, Lhs::Mode, false, Rhs, 0, Rhs::ColsAtCompileTime == 1>::run(
|
||||
dst, lhs.nestedExpression(), rhs, alpha);
|
||||
}
|
||||
};
|
||||
@ -863,7 +863,7 @@ struct generic_product_impl<Lhs, Rhs, DenseShape, SelfAdjointShape, ProductTag>
|
||||
|
||||
template <typename Dest>
|
||||
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
|
||||
selfadjoint_product_impl<Lhs, 0, Lhs::IsVectorAtCompileTime, typename Rhs::MatrixType, Rhs::Mode, false>::run(
|
||||
selfadjoint_product_impl<Lhs, 0, Lhs::RowsAtCompileTime == 1, typename Rhs::MatrixType, Rhs::Mode, false>::run(
|
||||
dst, lhs, rhs.nestedExpression(), alpha);
|
||||
}
|
||||
};
|
||||
|
@ -78,6 +78,14 @@ class SolverBase : public EigenBase<Derived> {
|
||||
template <typename Derived_>
|
||||
friend struct internal::solve_assertion;
|
||||
|
||||
ComputationInfo info() const {
|
||||
// CRTP static dispatch: Calls the 'info()' method on the derived class.
|
||||
// Derived must implement 'ComputationInfo info() const'.
|
||||
// If not implemented, name lookup falls back to this base method, causing
|
||||
// infinite recursion (detectable by -Winfinite-recursion).
|
||||
return derived().info();
|
||||
}
|
||||
|
||||
enum {
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
|
@ -603,10 +603,9 @@ class VectorwiseOp {
|
||||
/** Returns the expression where each subvector is the product of the vector \a other
|
||||
* by the corresponding subvector of \c *this */
|
||||
template <typename OtherDerived>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
CwiseBinaryOp<internal::scalar_product_op<Scalar>, const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type> EIGEN_DEVICE_FUNC
|
||||
operator*(const DenseBase<OtherDerived>& other) const {
|
||||
EIGEN_DEVICE_FUNC CwiseBinaryOp<internal::scalar_product_op<Scalar, typename OtherDerived::Scalar>,
|
||||
const ExpressionTypeNestedCleaned, const typename ExtendedType<OtherDerived>::Type>
|
||||
operator*(const DenseBase<OtherDerived>& other) const {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
|
||||
@ -616,8 +615,8 @@ class VectorwiseOp {
|
||||
/** Returns the expression where each subvector is the quotient of the corresponding
|
||||
* subvector of \c *this by the vector \a other */
|
||||
template <typename OtherDerived>
|
||||
EIGEN_DEVICE_FUNC CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const ExpressionTypeNestedCleaned,
|
||||
const typename ExtendedType<OtherDerived>::Type>
|
||||
EIGEN_DEVICE_FUNC CwiseBinaryOp<internal::scalar_quotient_op<Scalar, typename OtherDerived::Scalar>,
|
||||
const ExpressionTypeNestedCleaned, const typename ExtendedType<OtherDerived>::Type>
|
||||
operator/(const DenseBase<OtherDerived>& other) const {
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
|
||||
EIGEN_STATIC_ASSERT_ARRAYXPR(ExpressionType)
|
||||
|
@ -384,173 +384,6 @@ EIGEN_DEVICE_FUNC void DenseBase<Derived>::visit(Visitor& visitor) const {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/** \internal
|
||||
* \brief Base class to implement min and max visitors
|
||||
*/
|
||||
template <typename Derived>
|
||||
struct coeff_visitor {
|
||||
// default initialization to avoid countless invalid maybe-uninitialized warnings by gcc
|
||||
EIGEN_DEVICE_FUNC coeff_visitor() : row(-1), col(-1), res(0) {}
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
Index row, col;
|
||||
Scalar res;
|
||||
EIGEN_DEVICE_FUNC inline void init(const Scalar& value, Index i, Index j) {
|
||||
res = value;
|
||||
row = i;
|
||||
col = j;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar, int NaNPropagation, bool is_min = true>
|
||||
struct minmax_compare {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a < b; }
|
||||
static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_min<NaNPropagation>(p); }
|
||||
};
|
||||
|
||||
template <typename Scalar, int NaNPropagation>
|
||||
struct minmax_compare<Scalar, NaNPropagation, false> {
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
static EIGEN_DEVICE_FUNC inline bool compare(Scalar a, Scalar b) { return a > b; }
|
||||
static EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& p) { return predux_max<NaNPropagation>(p); }
|
||||
};
|
||||
|
||||
// Default implementation used by non-floating types, where we do not
|
||||
// need special logic for NaN handling.
|
||||
template <typename Derived, bool is_min, int NaNPropagation,
|
||||
bool isInt = NumTraits<typename Derived::Scalar>::IsInteger>
|
||||
struct minmax_coeff_visitor : coeff_visitor<Derived> {
|
||||
using Scalar = typename Derived::Scalar;
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
using Comparator = minmax_compare<Scalar, NaNPropagation, is_min>;
|
||||
static constexpr Index PacketSize = packet_traits<Scalar>::size;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index i, Index j) {
|
||||
if (Comparator::compare(value, this->res)) {
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index i, Index j) {
|
||||
Scalar value = Comparator::predux(p);
|
||||
if (Comparator::compare(value, this->res)) {
|
||||
const Packet range = preverse(plset<Packet>(Scalar(1)));
|
||||
Packet mask = pcmp_eq(pset1<Packet>(value), p);
|
||||
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
|
||||
this->res = value;
|
||||
this->row = Derived::IsRowMajor ? i : i + max_idx;
|
||||
this->col = Derived::IsRowMajor ? j + max_idx : j;
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index i, Index j) {
|
||||
Scalar value = Comparator::predux(p);
|
||||
const Packet range = preverse(plset<Packet>(Scalar(1)));
|
||||
Packet mask = pcmp_eq(pset1<Packet>(value), p);
|
||||
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
|
||||
this->res = value;
|
||||
this->row = Derived::IsRowMajor ? i : i + max_idx;
|
||||
this->col = Derived::IsRowMajor ? j + max_idx : j;
|
||||
}
|
||||
};
|
||||
|
||||
// Suppress NaN. The only case in which we return NaN is if the matrix is all NaN,
|
||||
// in which case, row=0, col=0 is returned for the location.
|
||||
template <typename Derived, bool is_min>
|
||||
struct minmax_coeff_visitor<Derived, is_min, PropagateNumbers, false> : coeff_visitor<Derived> {
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
using Comparator = minmax_compare<Scalar, PropagateNumbers, is_min>;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index i, Index j) {
|
||||
if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index i, Index j) {
|
||||
const Index PacketSize = packet_traits<Scalar>::size;
|
||||
Scalar value = Comparator::predux(p);
|
||||
if ((!(numext::isnan)(value) && (numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
|
||||
const Packet range = preverse(plset<Packet>(Scalar(1)));
|
||||
/* mask will be zero for NaNs, so they will be ignored. */
|
||||
Packet mask = pcmp_eq(pset1<Packet>(value), p);
|
||||
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
|
||||
this->res = value;
|
||||
this->row = Derived::IsRowMajor ? i : i + max_idx;
|
||||
this->col = Derived::IsRowMajor ? j + max_idx : j;
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index i, Index j) {
|
||||
const Index PacketSize = packet_traits<Scalar>::size;
|
||||
Scalar value = Comparator::predux(p);
|
||||
if ((numext::isnan)(value)) {
|
||||
this->res = value;
|
||||
this->row = 0;
|
||||
this->col = 0;
|
||||
return;
|
||||
}
|
||||
const Packet range = preverse(plset<Packet>(Scalar(1)));
|
||||
/* mask will be zero for NaNs, so they will be ignored. */
|
||||
Packet mask = pcmp_eq(pset1<Packet>(value), p);
|
||||
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
|
||||
this->res = value;
|
||||
this->row = Derived::IsRowMajor ? i : i + max_idx;
|
||||
this->col = Derived::IsRowMajor ? j + max_idx : j;
|
||||
}
|
||||
};
|
||||
|
||||
// Propagate NaNs. If the matrix contains NaN, the location of the first NaN
|
||||
// will be returned in row and col.
|
||||
template <typename Derived, bool is_min, int NaNPropagation>
|
||||
struct minmax_coeff_visitor<Derived, is_min, NaNPropagation, false> : coeff_visitor<Derived> {
|
||||
typedef typename Derived::Scalar Scalar;
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
using Comparator = minmax_compare<Scalar, PropagateNaN, is_min>;
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void operator()(const Scalar& value, Index i, Index j) {
|
||||
const bool value_is_nan = (numext::isnan)(value);
|
||||
if ((value_is_nan && !(numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
|
||||
this->res = value;
|
||||
this->row = i;
|
||||
this->col = j;
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC inline void packet(const Packet& p, Index i, Index j) {
|
||||
const Index PacketSize = packet_traits<Scalar>::size;
|
||||
Scalar value = Comparator::predux(p);
|
||||
const bool value_is_nan = (numext::isnan)(value);
|
||||
if ((value_is_nan && !(numext::isnan)(this->res)) || Comparator::compare(value, this->res)) {
|
||||
const Packet range = preverse(plset<Packet>(Scalar(1)));
|
||||
// If the value is NaN, pick the first position of a NaN, otherwise pick the first extremal value.
|
||||
Packet mask = value_is_nan ? pnot(pcmp_eq(p, p)) : pcmp_eq(pset1<Packet>(value), p);
|
||||
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
|
||||
this->res = value;
|
||||
this->row = Derived::IsRowMajor ? i : i + max_idx;
|
||||
this->col = Derived::IsRowMajor ? j + max_idx : j;
|
||||
}
|
||||
}
|
||||
EIGEN_DEVICE_FUNC inline void initpacket(const Packet& p, Index i, Index j) {
|
||||
const Index PacketSize = packet_traits<Scalar>::size;
|
||||
Scalar value = Comparator::predux(p);
|
||||
const bool value_is_nan = (numext::isnan)(value);
|
||||
const Packet range = preverse(plset<Packet>(Scalar(1)));
|
||||
// If the value is NaN, pick the first position of a NaN, otherwise pick the first extremal value.
|
||||
Packet mask = value_is_nan ? pnot(pcmp_eq(p, p)) : pcmp_eq(pset1<Packet>(value), p);
|
||||
Index max_idx = PacketSize - static_cast<Index>(predux_max(pand(range, mask)));
|
||||
this->res = value;
|
||||
this->row = Derived::IsRowMajor ? i : i + max_idx;
|
||||
this->col = Derived::IsRowMajor ? j + max_idx : j;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Derived, bool is_min, int NaNPropagation>
|
||||
struct functor_traits<minmax_coeff_visitor<Derived, is_min, NaNPropagation>> {
|
||||
using Scalar = typename Derived::Scalar;
|
||||
enum { Cost = NumTraits<Scalar>::AddCost, LinearAccess = false, PacketAccess = packet_traits<Scalar>::HasCmp };
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct all_visitor {
|
||||
using result_type = bool;
|
||||
@ -643,100 +476,6 @@ struct all_finite_impl<Derived, false> {
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
/** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||
*
|
||||
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||
* NaNPropagation == PropagateFast : undefined
|
||||
* NaNPropagation == PropagateNaN : result is NaN
|
||||
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
||||
*/
|
||||
template <typename Derived>
|
||||
template <int NaNPropagation, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar DenseBase<Derived>::minCoeff(IndexType* rowId,
|
||||
IndexType* colId) const {
|
||||
eigen_assert(this->rows() > 0 && this->cols() > 0 && "you are using an empty matrix");
|
||||
|
||||
internal::minmax_coeff_visitor<Derived, true, NaNPropagation> minVisitor;
|
||||
this->visit(minVisitor);
|
||||
*rowId = minVisitor.row;
|
||||
if (colId) *colId = minVisitor.col;
|
||||
return minVisitor.res;
|
||||
}
|
||||
|
||||
/** \returns the minimum of all coefficients of *this and puts in *index its location.
|
||||
*
|
||||
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||
* NaNPropagation == PropagateFast : undefined
|
||||
* NaNPropagation == PropagateNaN : result is NaN
|
||||
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(),
|
||||
* DenseBase::minCoeff()
|
||||
*/
|
||||
template <typename Derived>
|
||||
template <int NaNPropagation, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar DenseBase<Derived>::minCoeff(IndexType* index) const {
|
||||
eigen_assert(this->rows() > 0 && this->cols() > 0 && "you are using an empty matrix");
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
|
||||
internal::minmax_coeff_visitor<Derived, true, NaNPropagation> minVisitor;
|
||||
this->visit(minVisitor);
|
||||
*index = IndexType((RowsAtCompileTime == 1) ? minVisitor.col : minVisitor.row);
|
||||
return minVisitor.res;
|
||||
}
|
||||
|
||||
/** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
|
||||
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||
*
|
||||
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||
* NaNPropagation == PropagateFast : undefined
|
||||
* NaNPropagation == PropagateNaN : result is NaN
|
||||
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||
*
|
||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
||||
*/
|
||||
template <typename Derived>
|
||||
template <int NaNPropagation, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar DenseBase<Derived>::maxCoeff(IndexType* rowPtr,
|
||||
IndexType* colPtr) const {
|
||||
eigen_assert(this->rows() > 0 && this->cols() > 0 && "you are using an empty matrix");
|
||||
|
||||
internal::minmax_coeff_visitor<Derived, false, NaNPropagation> maxVisitor;
|
||||
this->visit(maxVisitor);
|
||||
*rowPtr = maxVisitor.row;
|
||||
if (colPtr) *colPtr = maxVisitor.col;
|
||||
return maxVisitor.res;
|
||||
}
|
||||
|
||||
/** \returns the maximum of all coefficients of *this and puts in *index its location.
|
||||
*
|
||||
* In case \c *this contains NaN, NaNPropagation determines the behavior:
|
||||
* NaNPropagation == PropagateFast : undefined
|
||||
* NaNPropagation == PropagateNaN : result is NaN
|
||||
* NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN
|
||||
* \warning the matrix must be not empty, otherwise an assertion is triggered.
|
||||
*
|
||||
* \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(),
|
||||
* DenseBase::maxCoeff()
|
||||
*/
|
||||
template <typename Derived>
|
||||
template <int NaNPropagation, typename IndexType>
|
||||
EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar DenseBase<Derived>::maxCoeff(IndexType* index) const {
|
||||
eigen_assert(this->rows() > 0 && this->cols() > 0 && "you are using an empty matrix");
|
||||
|
||||
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
|
||||
internal::minmax_coeff_visitor<Derived, false, NaNPropagation> maxVisitor;
|
||||
this->visit(maxVisitor);
|
||||
*index = (RowsAtCompileTime == 1) ? maxVisitor.col : maxVisitor.row;
|
||||
return maxVisitor.res;
|
||||
}
|
||||
|
||||
/** \returns true if all coefficients are true
|
||||
*
|
||||
* Example: \include MatrixBase_all.cpp
|
||||
|
@ -654,25 +654,6 @@ template <>
|
||||
EIGEN_STRONG_INLINE uint64_t pfirst<Packet4ul>(const Packet4ul& a) {
|
||||
return _mm_extract_epi64_0(_mm256_castsi256_si128(a));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux<Packet4l>(const Packet4l& a) {
|
||||
__m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
|
||||
return _mm_extract_epi64_0(r) + _mm_extract_epi64_1(r);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint64_t predux<Packet4ul>(const Packet4ul& a) {
|
||||
__m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
|
||||
return numext::bit_cast<uint64_t>(_mm_extract_epi64_0(r) + _mm_extract_epi64_1(r));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4l& a) {
|
||||
return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4ul& a) {
|
||||
return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0;
|
||||
}
|
||||
|
||||
#define MM256_SHUFFLE_EPI64(A, B, M) _mm256_shuffle_pd(_mm256_castsi256_pd(A), _mm256_castsi256_pd(B), M)
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4l, 4>& kernel) {
|
||||
@ -1955,23 +1936,6 @@ EIGEN_STRONG_INLINE Packet4d pldexp_fast<Packet4d>(const Packet4d& a, const Pack
|
||||
return pmul(a, c); // a * 2^e
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a) {
|
||||
return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a), _mm256_extractf128_ps(a, 1))));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a) {
|
||||
return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a), _mm256_extractf128_pd(a, 1))));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux<Packet8i>(const Packet8i& a) {
|
||||
return predux(Packet4i(_mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1))));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux<Packet8ui>(const Packet8ui& a) {
|
||||
return predux(Packet4ui(_mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1))));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f predux_half_dowto4<Packet8f>(const Packet8f& a) {
|
||||
return _mm_add_ps(_mm256_castps256_ps128(a), _mm256_extractf128_ps(a, 1));
|
||||
@ -1985,82 +1949,6 @@ EIGEN_STRONG_INLINE Packet4ui predux_half_dowto4<Packet8ui>(const Packet8ui& a)
|
||||
return _mm_add_epi32(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a) {
|
||||
Packet8f tmp;
|
||||
tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a, a, 1));
|
||||
tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp, tmp, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp, tmp, 1)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a) {
|
||||
Packet4d tmp;
|
||||
tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a, a, 1));
|
||||
return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a) {
|
||||
Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a, a, 1));
|
||||
tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp, tmp, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp, tmp, 1)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a) {
|
||||
Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a, a, 1));
|
||||
return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a) {
|
||||
Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a, a, 1));
|
||||
tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp, tmp, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a) {
|
||||
Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a, a, 1));
|
||||
return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
|
||||
}
|
||||
|
||||
// not needed yet
|
||||
// template<> EIGEN_STRONG_INLINE bool predux_all(const Packet8f& x)
|
||||
// {
|
||||
// return _mm256_movemask_ps(x)==0xFF;
|
||||
// }
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x) {
|
||||
return _mm256_movemask_ps(x) != 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4d& x) {
|
||||
return _mm256_movemask_pd(x) != 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8i& x) {
|
||||
return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8ui& x) {
|
||||
return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8h& x) {
|
||||
return _mm_movemask_epi8(x) != 0;
|
||||
}
|
||||
#endif // EIGEN_VECTORIZE_AVX512FP16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8bf& x) {
|
||||
return _mm_movemask_epi8(x) != 0;
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8f, 8>& kernel) {
|
||||
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
|
||||
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
|
||||
@ -2361,24 +2249,64 @@ EIGEN_STRONG_INLINE Packet8h ptrunc<Packet8h>(const Packet8h& a) {
|
||||
return float2half(ptrunc<Packet8f>(half2float(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pisinf<Packet8h>(const Packet8h& a) {
|
||||
constexpr uint16_t kInf = ((1 << 5) - 1) << 10;
|
||||
constexpr uint16_t kAbsMask = (1 << 15) - 1;
|
||||
return _mm_cmpeq_epi16(_mm_and_si128(a.m_val, _mm_set1_epi16(kAbsMask)), _mm_set1_epi16(kInf));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pisnan<Packet8h>(const Packet8h& a) {
|
||||
constexpr uint16_t kInf = ((1 << 5) - 1) << 10;
|
||||
constexpr uint16_t kAbsMask = (1 << 15) - 1;
|
||||
return _mm_cmpgt_epi16(_mm_and_si128(a.m_val, _mm_set1_epi16(kAbsMask)), _mm_set1_epi16(kInf));
|
||||
}
|
||||
|
||||
// convert the sign-magnitude representation to two's complement
|
||||
EIGEN_STRONG_INLINE __m128i pmaptosigned(const __m128i& a) {
|
||||
constexpr uint16_t kAbsMask = (1 << 15) - 1;
|
||||
// if 'a' has the sign bit set, clear the sign bit and negate the result as if it were an integer
|
||||
return _mm_sign_epi16(_mm_and_si128(a, _mm_set1_epi16(kAbsMask)), a);
|
||||
}
|
||||
|
||||
// return true if both `a` and `b` are not NaN
|
||||
EIGEN_STRONG_INLINE Packet8h pisordered(const Packet8h& a, const Packet8h& b) {
|
||||
constexpr uint16_t kInf = ((1 << 5) - 1) << 10;
|
||||
constexpr uint16_t kAbsMask = (1 << 15) - 1;
|
||||
__m128i abs_a = _mm_and_si128(a.m_val, _mm_set1_epi16(kAbsMask));
|
||||
__m128i abs_b = _mm_and_si128(b.m_val, _mm_set1_epi16(kAbsMask));
|
||||
// check if both `abs_a <= kInf` and `abs_b <= kInf` by checking if max(abs_a, abs_b) <= kInf
|
||||
// SSE has no `lesser or equal` instruction for integers, but comparing against kInf + 1 accomplishes the same goal
|
||||
return _mm_cmplt_epi16(_mm_max_epu16(abs_a, abs_b), _mm_set1_epi16(kInf + 1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pcmp_eq(const Packet8h& a, const Packet8h& b) {
|
||||
return Pack16To8(pcmp_eq(half2float(a), half2float(b)));
|
||||
__m128i isOrdered = pisordered(a, b);
|
||||
__m128i isEqual = _mm_cmpeq_epi16(pmaptosigned(a.m_val), pmaptosigned(b.m_val));
|
||||
return _mm_and_si128(isOrdered, isEqual);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pcmp_le(const Packet8h& a, const Packet8h& b) {
|
||||
return Pack16To8(pcmp_le(half2float(a), half2float(b)));
|
||||
__m128i isOrdered = pisordered(a, b);
|
||||
__m128i isGreater = _mm_cmpgt_epi16(pmaptosigned(a.m_val), pmaptosigned(b.m_val));
|
||||
return _mm_andnot_si128(isGreater, isOrdered);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pcmp_lt(const Packet8h& a, const Packet8h& b) {
|
||||
return Pack16To8(pcmp_lt(half2float(a), half2float(b)));
|
||||
__m128i isOrdered = pisordered(a, b);
|
||||
__m128i isLess = _mm_cmplt_epi16(pmaptosigned(a.m_val), pmaptosigned(b.m_val));
|
||||
return _mm_and_si128(isOrdered, isLess);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pcmp_lt_or_nan(const Packet8h& a, const Packet8h& b) {
|
||||
return Pack16To8(pcmp_lt_or_nan(half2float(a), half2float(b)));
|
||||
__m128i isUnordered = por(pisnan(a), pisnan(b));
|
||||
__m128i isLess = _mm_cmplt_epi16(pmaptosigned(a.m_val), pmaptosigned(b.m_val));
|
||||
return _mm_or_si128(isUnordered, isLess);
|
||||
}
|
||||
|
||||
template <>
|
||||
@ -2473,34 +2401,6 @@ EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet8h>(Eigen::half* to, const
|
||||
to[stride * 7] = aux[7];
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
float reduced = predux<Packet8f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_max<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
float reduced = predux_max<Packet8f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_min<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
float reduced = predux_min<Packet8f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet8h>(const Packet8h& a) {
|
||||
Packet8f af = half2float(a);
|
||||
float reduced = predux_mul<Packet8f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h preverse(const Packet8h& a) {
|
||||
__m128i m = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
|
||||
@ -2859,26 +2759,6 @@ EIGEN_STRONG_INLINE void pscatter<bfloat16, Packet8bf>(bfloat16* to, const Packe
|
||||
to[stride * 7] = aux[7];
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux<Packet8bf>(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux<Packet8f>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max<Packet8bf>(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_max<Packet8f>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min<Packet8bf>(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_min<Packet8f>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_mul<Packet8bf>(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_mul<Packet8f>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8bf preverse(const Packet8bf& a) {
|
||||
__m128i m = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
|
||||
|
353
Eigen/src/Core/arch/AVX/Reductions.h
Normal file
353
Eigen/src/Core/arch/AVX/Reductions.h
Normal file
@ -0,0 +1,353 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2025 Charlie Schlosser <cs.schlosser@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_REDUCTIONS_AVX_H
|
||||
#define EIGEN_REDUCTIONS_AVX_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8i -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux(const Packet8i& a) {
|
||||
Packet4i lo = _mm256_castsi256_si128(a);
|
||||
Packet4i hi = _mm256_extractf128_si256(a, 1);
|
||||
return predux(padd(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_mul(const Packet8i& a) {
|
||||
Packet4i lo = _mm256_castsi256_si128(a);
|
||||
Packet4i hi = _mm256_extractf128_si256(a, 1);
|
||||
return predux_mul(pmul(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_min(const Packet8i& a) {
|
||||
Packet4i lo = _mm256_castsi256_si128(a);
|
||||
Packet4i hi = _mm256_extractf128_si256(a, 1);
|
||||
return predux_min(pmin(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_max(const Packet8i& a) {
|
||||
Packet4i lo = _mm256_castsi256_si128(a);
|
||||
Packet4i hi = _mm256_extractf128_si256(a, 1);
|
||||
return predux_max(pmax(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8i& a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_movemask_epi8(a) != 0x0;
|
||||
#else
|
||||
return _mm256_movemask_ps(_mm256_castsi256_ps(a)) != 0x0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8ui -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux(const Packet8ui& a) {
|
||||
Packet4ui lo = _mm256_castsi256_si128(a);
|
||||
Packet4ui hi = _mm256_extractf128_si256(a, 1);
|
||||
return predux(padd(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux_mul(const Packet8ui& a) {
|
||||
Packet4ui lo = _mm256_castsi256_si128(a);
|
||||
Packet4ui hi = _mm256_extractf128_si256(a, 1);
|
||||
return predux_mul(pmul(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux_min(const Packet8ui& a) {
|
||||
Packet4ui lo = _mm256_castsi256_si128(a);
|
||||
Packet4ui hi = _mm256_extractf128_si256(a, 1);
|
||||
return predux_min(pmin(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux_max(const Packet8ui& a) {
|
||||
Packet4ui lo = _mm256_castsi256_si128(a);
|
||||
Packet4ui hi = _mm256_extractf128_si256(a, 1);
|
||||
return predux_max(pmax(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8ui& a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
return _mm256_movemask_epi8(a) != 0x0;
|
||||
#else
|
||||
return _mm256_movemask_ps(_mm256_castsi256_ps(a)) != 0x0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX2
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4l -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux(const Packet4l& a) {
|
||||
Packet2l lo = _mm256_castsi256_si128(a);
|
||||
Packet2l hi = _mm256_extractf128_si256(a, 1);
|
||||
return predux(padd(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4l& a) {
|
||||
return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0x0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4ul -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint64_t predux(const Packet4ul& a) {
|
||||
return static_cast<uint64_t>(predux(Packet4l(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4ul& a) {
|
||||
return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0x0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8f -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux(const Packet8f& a) {
|
||||
Packet4f lo = _mm256_castps256_ps128(a);
|
||||
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
||||
return predux(padd(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul(const Packet8f& a) {
|
||||
Packet4f lo = _mm256_castps256_ps128(a);
|
||||
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
||||
return predux_mul(pmul(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min(const Packet8f& a) {
|
||||
Packet4f lo = _mm256_castps256_ps128(a);
|
||||
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
||||
return predux_min(pmin(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min<PropagateNumbers>(const Packet8f& a) {
|
||||
Packet4f lo = _mm256_castps256_ps128(a);
|
||||
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
||||
return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min<PropagateNaN>(const Packet8f& a) {
|
||||
Packet4f lo = _mm256_castps256_ps128(a);
|
||||
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
||||
return predux_min<PropagateNaN>(pmin<PropagateNaN>(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max(const Packet8f& a) {
|
||||
Packet4f lo = _mm256_castps256_ps128(a);
|
||||
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
||||
return predux_max(pmax(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max<PropagateNumbers>(const Packet8f& a) {
|
||||
Packet4f lo = _mm256_castps256_ps128(a);
|
||||
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
||||
return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max<PropagateNaN>(const Packet8f& a) {
|
||||
Packet4f lo = _mm256_castps256_ps128(a);
|
||||
Packet4f hi = _mm256_extractf128_ps(a, 1);
|
||||
return predux_max<PropagateNaN>(pmax<PropagateNaN>(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8f& a) {
|
||||
return _mm256_movemask_ps(a) != 0x0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4d -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux(const Packet4d& a) {
|
||||
Packet2d lo = _mm256_castpd256_pd128(a);
|
||||
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
||||
return predux(padd(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul(const Packet4d& a) {
|
||||
Packet2d lo = _mm256_castpd256_pd128(a);
|
||||
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
||||
return predux_mul(pmul(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min(const Packet4d& a) {
|
||||
Packet2d lo = _mm256_castpd256_pd128(a);
|
||||
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
||||
return predux_min(pmin(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min<PropagateNumbers>(const Packet4d& a) {
|
||||
Packet2d lo = _mm256_castpd256_pd128(a);
|
||||
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
||||
return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min<PropagateNaN>(const Packet4d& a) {
|
||||
Packet2d lo = _mm256_castpd256_pd128(a);
|
||||
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
||||
return predux_min<PropagateNaN>(pmin<PropagateNaN>(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max(const Packet4d& a) {
|
||||
Packet2d lo = _mm256_castpd256_pd128(a);
|
||||
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
||||
return predux_max(pmax(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max<PropagateNumbers>(const Packet4d& a) {
|
||||
Packet2d lo = _mm256_castpd256_pd128(a);
|
||||
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
||||
return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max<PropagateNaN>(const Packet4d& a) {
|
||||
Packet2d lo = _mm256_castpd256_pd128(a);
|
||||
Packet2d hi = _mm256_extractf128_pd(a, 1);
|
||||
return predux_max<PropagateNaN>(pmax<PropagateNaN>(lo, hi));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4d& a) {
|
||||
return _mm256_movemask_pd(a) != 0x0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8h -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux(const Packet8h& a) {
|
||||
return static_cast<half>(predux(half2float(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_mul(const Packet8h& a) {
|
||||
return static_cast<half>(predux_mul(half2float(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_min(const Packet8h& a) {
|
||||
return static_cast<half>(predux_min(half2float(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_min<PropagateNumbers>(const Packet8h& a) {
|
||||
return static_cast<half>(predux_min<PropagateNumbers>(half2float(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_min<PropagateNaN>(const Packet8h& a) {
|
||||
return static_cast<half>(predux_min<PropagateNaN>(half2float(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_max(const Packet8h& a) {
|
||||
return static_cast<half>(predux_max(half2float(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_max<PropagateNumbers>(const Packet8h& a) {
|
||||
return static_cast<half>(predux_max<PropagateNumbers>(half2float(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_max<PropagateNaN>(const Packet8h& a) {
|
||||
return static_cast<half>(predux_max<PropagateNaN>(half2float(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8h& a) {
|
||||
return _mm_movemask_epi8(a) != 0;
|
||||
}
|
||||
#endif // EIGEN_VECTORIZE_AVX512FP16
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8bf -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux<Packet8f>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_mul(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_mul<Packet8f>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_min(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNumbers>(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_min<PropagateNumbers>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNaN>(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_min<PropagateNaN>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_max<Packet8f>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNumbers>(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_max<PropagateNumbers>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNaN>(const Packet8bf& a) {
|
||||
return static_cast<bfloat16>(predux_max<PropagateNaN>(Bf16ToF32(a)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8bf& a) {
|
||||
return _mm_movemask_epi8(a) != 0;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_REDUCTIONS_AVX_H
|
@ -1494,40 +1494,6 @@ EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, const Packet8d&
|
||||
OUTPUT = _mm512_inserti32x4(OUTPUT, _mm256_extractf128_si256(INPUTB, 1), 3);
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
__m256 lane0 = _mm512_extractf32x8_ps(a, 0);
|
||||
__m256 lane1 = _mm512_extractf32x8_ps(a, 1);
|
||||
Packet8f x = _mm256_add_ps(lane0, lane1);
|
||||
return predux<Packet8f>(x);
|
||||
#else
|
||||
__m128 lane0 = _mm512_extractf32x4_ps(a, 0);
|
||||
__m128 lane1 = _mm512_extractf32x4_ps(a, 1);
|
||||
__m128 lane2 = _mm512_extractf32x4_ps(a, 2);
|
||||
__m128 lane3 = _mm512_extractf32x4_ps(a, 3);
|
||||
__m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3));
|
||||
return predux<Packet4f>(sum);
|
||||
#endif
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
|
||||
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
|
||||
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
|
||||
__m256d sum = _mm256_add_pd(lane0, lane1);
|
||||
return predux<Packet4d>(sum);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux<Packet8l>(const Packet8l& a) {
|
||||
return _mm512_reduce_add_epi64(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux<Packet16i>(const Packet16i& a) {
|
||||
return _mm512_reduce_add_epi32(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8f predux_half_dowto4<Packet16f>(const Packet16f& a) {
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
@ -1574,136 +1540,6 @@ EIGEN_STRONG_INLINE Packet4l predux_half_dowto4<Packet8l>(const Packet8l& a) {
|
||||
return _mm256_add_epi64(lane0, lane1);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
|
||||
// #ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
#if 0
|
||||
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
|
||||
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
|
||||
Packet8f res = pmul(lane0, lane1);
|
||||
res = pmul(res, _mm256_permute2f128_ps(res, res, 1));
|
||||
res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
|
||||
return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
|
||||
#else
|
||||
__m128 lane0 = _mm512_extractf32x4_ps(a, 0);
|
||||
__m128 lane1 = _mm512_extractf32x4_ps(a, 1);
|
||||
__m128 lane2 = _mm512_extractf32x4_ps(a, 2);
|
||||
__m128 lane3 = _mm512_extractf32x4_ps(a, 3);
|
||||
__m128 res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));
|
||||
res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
|
||||
return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
|
||||
#endif
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
|
||||
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
|
||||
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
|
||||
__m256d res = pmul(lane0, lane1);
|
||||
res = pmul(res, _mm256_permute2f128_pd(res, res, 1));
|
||||
return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_mul<Packet16i>(const Packet16i& a) {
|
||||
return _mm512_reduce_mul_epi32(a);
|
||||
}
|
||||
|
||||
#if EIGEN_COMP_MSVC
|
||||
// MSVC's _mm512_reduce_mul_epi64 is borked, at least up to and including 1939.
|
||||
// alignas(64) int64_t data[] = { 1,1,-1,-1,1,-1,-1,-1 };
|
||||
// int64_t out = _mm512_reduce_mul_epi64(_mm512_load_epi64(data));
|
||||
// produces garbage: 4294967295. It seems to happen whenever the output is supposed to be negative.
|
||||
// Fall back to a manual approach:
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux_mul<Packet8l>(const Packet8l& a) {
|
||||
Packet4l lane0 = _mm512_extracti64x4_epi64(a, 0);
|
||||
Packet4l lane1 = _mm512_extracti64x4_epi64(a, 1);
|
||||
Packet4l res = pmul(lane0, lane1);
|
||||
res = pmul(res, Packet4l(_mm256_permute2x128_si256(res, res, 1)));
|
||||
res = pmul(res, Packet4l(_mm256_shuffle_epi32(res, 0xE)));
|
||||
return pfirst(res);
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux_mul<Packet8l>(const Packet8l& a) {
|
||||
return _mm512_reduce_mul_epi64(a);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {
|
||||
__m128 lane0 = _mm512_extractf32x4_ps(a, 0);
|
||||
__m128 lane1 = _mm512_extractf32x4_ps(a, 1);
|
||||
__m128 lane2 = _mm512_extractf32x4_ps(a, 2);
|
||||
__m128 lane3 = _mm512_extractf32x4_ps(a, 3);
|
||||
__m128 res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));
|
||||
res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
|
||||
return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) {
|
||||
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
|
||||
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
|
||||
__m256d res = _mm256_min_pd(lane0, lane1);
|
||||
res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));
|
||||
return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_min<Packet16i>(const Packet16i& a) {
|
||||
return _mm512_reduce_min_epi32(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux_min<Packet8l>(const Packet8l& a) {
|
||||
return _mm512_reduce_min_epi64(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {
|
||||
__m128 lane0 = _mm512_extractf32x4_ps(a, 0);
|
||||
__m128 lane1 = _mm512_extractf32x4_ps(a, 1);
|
||||
__m128 lane2 = _mm512_extractf32x4_ps(a, 2);
|
||||
__m128 lane3 = _mm512_extractf32x4_ps(a, 3);
|
||||
__m128 res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));
|
||||
res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
|
||||
return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
|
||||
__m256d lane0 = _mm512_extractf64x4_pd(a, 0);
|
||||
__m256d lane1 = _mm512_extractf64x4_pd(a, 1);
|
||||
__m256d res = _mm256_max_pd(lane0, lane1);
|
||||
res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));
|
||||
return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_max<Packet16i>(const Packet16i& a) {
|
||||
return _mm512_reduce_max_epi32(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux_max<Packet8l>(const Packet8l& a) {
|
||||
return _mm512_reduce_max_epi64(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet16f& a) {
|
||||
return _mm512_reduce_or_epi32(_mm512_castps_si512(a)) != 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet16i& a) {
|
||||
return _mm512_reduce_or_epi32(a) != 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8d& a) {
|
||||
return _mm512_reduce_or_epi64(_mm512_castpd_si512(a)) != 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8l& a) {
|
||||
return _mm512_reduce_or_epi64(a) != 0;
|
||||
}
|
||||
|
||||
#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \
|
||||
EIGEN_INSERT_8f_INTO_16f(OUTPUT[INDEX], INPUT[INDEX], INPUT[INDEX + STRIDE]);
|
||||
|
||||
@ -2466,12 +2302,6 @@ EIGEN_STRONG_INLINE Packet16h pnmsub<Packet16h>(const Packet16h& a, const Packet
|
||||
return float2half(pnmsub(half2float(a), half2float(b), half2float(c)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux<Packet16h>(const Packet16h& from) {
|
||||
Packet16f from_float = half2float(from);
|
||||
return half(predux(from_float));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h predux_half_dowto4<Packet16h>(const Packet16h& a) {
|
||||
Packet8h lane0 = _mm256_extractf128_si256(a, 0);
|
||||
@ -2479,26 +2309,6 @@ EIGEN_STRONG_INLINE Packet8h predux_half_dowto4<Packet16h>(const Packet16h& a) {
|
||||
return padd<Packet8h>(lane0, lane1);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_max<Packet16h>(const Packet16h& a) {
|
||||
Packet16f af = half2float(a);
|
||||
float reduced = predux_max<Packet16f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Eigen::half predux_min<Packet16h>(const Packet16h& a) {
|
||||
Packet16f af = half2float(a);
|
||||
float reduced = predux_min<Packet16f>(af);
|
||||
return Eigen::half(reduced);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_mul<Packet16h>(const Packet16h& from) {
|
||||
Packet16f from_float = half2float(from);
|
||||
return half(predux_mul(from_float));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h preverse(const Packet16h& a) {
|
||||
__m128i m = _mm_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
|
||||
@ -3005,26 +2815,6 @@ EIGEN_STRONG_INLINE Packet8bf predux_half_dowto4<Packet16bf>(const Packet16bf& a
|
||||
return padd<Packet8bf>(lane0, lane1);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux<Packet16bf>(const Packet16bf& p) {
|
||||
return static_cast<bfloat16>(predux<Packet16f>(Bf16ToF32(p)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_mul<Packet16bf>(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_mul<Packet16f>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min<Packet16bf>(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_min<Packet16f>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max<Packet16bf>(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_max<Packet16f>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16bf preverse(const Packet16bf& a) {
|
||||
__m256i m = _mm256_setr_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9, 6, 7,
|
||||
|
297
Eigen/src/Core/arch/AVX512/Reductions.h
Normal file
297
Eigen/src/Core/arch/AVX512/Reductions.h
Normal file
@ -0,0 +1,297 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2025 Charlie Schlosser <cs.schlosser@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_REDUCTIONS_AVX512_H
|
||||
#define EIGEN_REDUCTIONS_AVX512_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet16i -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux(const Packet16i& a) {
|
||||
return _mm512_reduce_add_epi32(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_mul(const Packet16i& a) {
|
||||
return _mm512_reduce_mul_epi32(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_min(const Packet16i& a) {
|
||||
return _mm512_reduce_min_epi32(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_max(const Packet16i& a) {
|
||||
return _mm512_reduce_max_epi32(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet16i& a) {
|
||||
return _mm512_reduce_or_epi32(a) != 0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8l -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux(const Packet8l& a) {
|
||||
return _mm512_reduce_add_epi64(a);
|
||||
}
|
||||
|
||||
#if EIGEN_COMP_MSVC
|
||||
// MSVC's _mm512_reduce_mul_epi64 is borked, at least up to and including 1939.
|
||||
// alignas(64) int64_t data[] = { 1,1,-1,-1,1,-1,-1,-1 };
|
||||
// int64_t out = _mm512_reduce_mul_epi64(_mm512_load_epi64(data));
|
||||
// produces garbage: 4294967295. It seems to happen whenever the output is supposed to be negative.
|
||||
// Fall back to a manual approach:
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux_mul(const Packet8l& a) {
|
||||
Packet4l lane0 = _mm512_extracti64x4_epi64(a, 0);
|
||||
Packet4l lane1 = _mm512_extracti64x4_epi64(a, 1);
|
||||
return predux_mul(pmul(lane0, lane1));
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux_mul<Packet8l>(const Packet8l& a) {
|
||||
return _mm512_reduce_mul_epi64(a);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux_min(const Packet8l& a) {
|
||||
return _mm512_reduce_min_epi64(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux_max(const Packet8l& a) {
|
||||
return _mm512_reduce_max_epi64(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8l& a) {
|
||||
return _mm512_reduce_or_epi64(a) != 0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet16f -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux(const Packet16f& a) {
|
||||
return _mm512_reduce_add_ps(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul(const Packet16f& a) {
|
||||
return _mm512_reduce_mul_ps(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min(const Packet16f& a) {
|
||||
return _mm512_reduce_min_ps(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min<PropagateNumbers>(const Packet16f& a) {
|
||||
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
|
||||
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
|
||||
return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lane0, lane1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min<PropagateNaN>(const Packet16f& a) {
|
||||
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
|
||||
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
|
||||
return predux_min<PropagateNaN>(pmin<PropagateNaN>(lane0, lane1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max(const Packet16f& a) {
|
||||
return _mm512_reduce_max_ps(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max<PropagateNumbers>(const Packet16f& a) {
|
||||
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
|
||||
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
|
||||
return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lane0, lane1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max<PropagateNaN>(const Packet16f& a) {
|
||||
Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
|
||||
Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
|
||||
return predux_max<PropagateNaN>(pmax<PropagateNaN>(lane0, lane1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet16f& a) {
|
||||
return _mm512_reduce_or_epi32(_mm512_castps_si512(a)) != 0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet8d -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux(const Packet8d& a) {
|
||||
return _mm512_reduce_add_pd(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul(const Packet8d& a) {
|
||||
return _mm512_reduce_mul_pd(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min(const Packet8d& a) {
|
||||
return _mm512_reduce_min_pd(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min<PropagateNumbers>(const Packet8d& a) {
|
||||
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
|
||||
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
|
||||
return predux_min<PropagateNumbers>(pmin<PropagateNumbers>(lane0, lane1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min<PropagateNaN>(const Packet8d& a) {
|
||||
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
|
||||
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
|
||||
return predux_min<PropagateNaN>(pmin<PropagateNaN>(lane0, lane1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max(const Packet8d& a) {
|
||||
return _mm512_reduce_max_pd(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max<PropagateNumbers>(const Packet8d& a) {
|
||||
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
|
||||
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
|
||||
return predux_max<PropagateNumbers>(pmax<PropagateNumbers>(lane0, lane1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max<PropagateNaN>(const Packet8d& a) {
|
||||
Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
|
||||
Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
|
||||
return predux_max<PropagateNaN>(pmax<PropagateNaN>(lane0, lane1));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet8d& a) {
|
||||
return _mm512_reduce_or_epi64(_mm512_castpd_si512(a)) != 0;
|
||||
}
|
||||
|
||||
#ifndef EIGEN_VECTORIZE_AVX512FP16
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet16h -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux(const Packet16h& from) {
|
||||
return half(predux(half2float(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_mul(const Packet16h& from) {
|
||||
return half(predux_mul(half2float(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_min(const Packet16h& from) {
|
||||
return half(predux_min(half2float(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_min<PropagateNumbers>(const Packet16h& from) {
|
||||
return half(predux_min<PropagateNumbers>(half2float(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_min<PropagateNaN>(const Packet16h& from) {
|
||||
return half(predux_min<PropagateNaN>(half2float(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_max(const Packet16h& from) {
|
||||
return half(predux_max(half2float(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_max<PropagateNumbers>(const Packet16h& from) {
|
||||
return half(predux_max<PropagateNumbers>(half2float(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE half predux_max<PropagateNaN>(const Packet16h& from) {
|
||||
return half(predux_max<PropagateNaN>(half2float(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet16h& a) {
|
||||
return predux_any<Packet8i>(a.m_val);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet16bf -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux<Packet16f>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_mul(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_mul<Packet16f>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_min<Packet16f>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNumbers>(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_min<PropagateNumbers>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_min<PropagateNaN>(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_min<PropagateNaN>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_max(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNumbers>(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_max<PropagateNumbers>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bfloat16 predux_max<PropagateNaN>(const Packet16bf& from) {
|
||||
return static_cast<bfloat16>(predux_max<PropagateNaN>(Bf16ToF32(from)));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet16bf& a) {
|
||||
return predux_any<Packet8i>(a.m_val);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_REDUCTIONS_AVX512_H
|
@ -129,30 +129,20 @@ EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a)
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_VSX
|
||||
// VSX support varies between different compilers and even different
|
||||
// versions of the same compiler. For gcc version >= 4.9.3, we can use
|
||||
// vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
|
||||
// a slow version that works with older compilers.
|
||||
// Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles
|
||||
// are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963
|
||||
template <>
|
||||
inline Packet2l pcast<Packet2d, Packet2l>(const Packet2d& x) {
|
||||
#if EIGEN_GNUC_STRICT_AT_LEAST(7, 1, 0)
|
||||
return vec_cts(x, 0); // TODO: check clang version.
|
||||
#else
|
||||
double tmp[2];
|
||||
memcpy(tmp, &x, sizeof(tmp));
|
||||
Packet2l l = {static_cast<long long>(tmp[0]), static_cast<long long>(tmp[1])};
|
||||
return l;
|
||||
#endif
|
||||
EIGEN_ALIGN_MAX double dtmp[2];
|
||||
pstore(dtmp, x);
|
||||
EIGEN_ALIGN_MAX long long itmp[2] = {static_cast<long long>(dtmp[0]), static_cast<long long>(dtmp[1])};
|
||||
return vec_xl(0, itmp);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline Packet2d pcast<Packet2l, Packet2d>(const Packet2l& x) {
|
||||
unsigned long long tmp[2];
|
||||
memcpy(tmp, &x, sizeof(tmp));
|
||||
Packet2d d = {static_cast<double>(tmp[0]), static_cast<double>(tmp[1])};
|
||||
return d;
|
||||
EIGEN_ALIGN_MAX long long itmp[2];
|
||||
vec_xst(x, 0, itmp);
|
||||
EIGEN_ALIGN_MAX double dtmp[2] = {static_cast<double>(itmp[0]), static_cast<double>(itmp[1])};
|
||||
return pload<Packet2d>(dtmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1689,7 +1689,8 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet phypot_complex(const
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
struct psign_impl<Packet, std::enable_if_t<!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
|
||||
struct psign_impl<Packet, std::enable_if_t<!is_scalar<Packet>::value &&
|
||||
!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
|
||||
!NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>> {
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
@ -1705,7 +1706,8 @@ struct psign_impl<Packet, std::enable_if_t<!NumTraits<typename unpacket_traits<P
|
||||
};
|
||||
|
||||
template <typename Packet>
|
||||
struct psign_impl<Packet, std::enable_if_t<!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
|
||||
struct psign_impl<Packet, std::enable_if_t<!is_scalar<Packet>::value &&
|
||||
!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
|
||||
NumTraits<typename unpacket_traits<Packet>::type>::IsSigned &&
|
||||
NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>> {
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
|
||||
@ -1724,7 +1726,8 @@ struct psign_impl<Packet, std::enable_if_t<!NumTraits<typename unpacket_traits<P
|
||||
};
|
||||
|
||||
template <typename Packet>
|
||||
struct psign_impl<Packet, std::enable_if_t<!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
|
||||
struct psign_impl<Packet, std::enable_if_t<!is_scalar<Packet>::value &&
|
||||
!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
|
||||
!NumTraits<typename unpacket_traits<Packet>::type>::IsSigned &&
|
||||
NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>> {
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
|
||||
@ -1739,7 +1742,8 @@ struct psign_impl<Packet, std::enable_if_t<!NumTraits<typename unpacket_traits<P
|
||||
|
||||
// \internal \returns the the sign of a complex number z, defined as z / abs(z).
|
||||
template <typename Packet>
|
||||
struct psign_impl<Packet, std::enable_if_t<NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
|
||||
struct psign_impl<Packet, std::enable_if_t<!is_scalar<Packet>::value &&
|
||||
NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
|
||||
unpacket_traits<Packet>::vectorizable>> {
|
||||
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
@ -2176,7 +2180,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, c
|
||||
|
||||
// Generic implementation of pow(x,y).
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_pow(const Packet& x, const Packet& y) {
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS std::enable_if_t<!is_scalar<Packet>::value, Packet> generic_pow(
|
||||
const Packet& x, const Packet& y) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
|
||||
const Packet cst_inf = pset1<Packet>(NumTraits<Scalar>::infinity());
|
||||
@ -2266,6 +2271,12 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_pow(const Pac
|
||||
return pow;
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS std::enable_if_t<is_scalar<Scalar>::value, Scalar> generic_pow(
|
||||
const Scalar& x, const Scalar& y) {
|
||||
return numext::pow(x, y);
|
||||
}
|
||||
|
||||
namespace unary_pow {
|
||||
|
||||
template <typename ScalarExponent, bool IsInteger = NumTraits<ScalarExponent>::IsInteger>
|
||||
@ -2347,35 +2358,36 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet int_pow(const Packet& x, const Scal
|
||||
}
|
||||
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet gen_pow(const Packet& x,
|
||||
const typename unpacket_traits<Packet>::type& exponent) {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<!is_scalar<Packet>::value, Packet> gen_pow(
|
||||
const Packet& x, const typename unpacket_traits<Packet>::type& exponent) {
|
||||
const Packet exponent_packet = pset1<Packet>(exponent);
|
||||
return generic_pow_impl(x, exponent_packet);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::enable_if_t<is_scalar<Scalar>::value, Scalar> gen_pow(
|
||||
const Scalar& x, const Scalar& exponent) {
|
||||
return numext::pow(x, exponent);
|
||||
}
|
||||
|
||||
template <typename Packet, typename ScalarExponent>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet handle_nonint_nonint_errors(const Packet& x, const Packet& powx,
|
||||
const ScalarExponent& exponent) {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
|
||||
// non-integer base and exponent case
|
||||
|
||||
const Scalar pos_zero = Scalar(0);
|
||||
const Scalar all_ones = ptrue<Scalar>(Scalar());
|
||||
const Scalar pos_one = Scalar(1);
|
||||
const Scalar pos_inf = NumTraits<Scalar>::infinity();
|
||||
|
||||
const Packet cst_pos_zero = pzero(x);
|
||||
const Packet cst_pos_one = pset1<Packet>(pos_one);
|
||||
const Packet cst_pos_inf = pset1<Packet>(pos_inf);
|
||||
const Packet cst_pos_one = pset1<Packet>(Scalar(1));
|
||||
const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity());
|
||||
const Packet cst_true = ptrue<Packet>(x);
|
||||
|
||||
const bool exponent_is_not_fin = !(numext::isfinite)(exponent);
|
||||
const bool exponent_is_neg = exponent < ScalarExponent(0);
|
||||
const bool exponent_is_pos = exponent > ScalarExponent(0);
|
||||
|
||||
const Packet exp_is_not_fin = pset1<Packet>(exponent_is_not_fin ? all_ones : pos_zero);
|
||||
const Packet exp_is_neg = pset1<Packet>(exponent_is_neg ? all_ones : pos_zero);
|
||||
const Packet exp_is_pos = pset1<Packet>(exponent_is_pos ? all_ones : pos_zero);
|
||||
const Packet exp_is_not_fin = exponent_is_not_fin ? cst_true : cst_pos_zero;
|
||||
const Packet exp_is_neg = exponent_is_neg ? cst_true : cst_pos_zero;
|
||||
const Packet exp_is_pos = exponent_is_pos ? cst_true : cst_pos_zero;
|
||||
const Packet exp_is_inf = pand(exp_is_not_fin, por(exp_is_neg, exp_is_pos));
|
||||
const Packet exp_is_nan = pandnot(exp_is_not_fin, por(exp_is_neg, exp_is_pos));
|
||||
|
||||
@ -2411,22 +2423,15 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet handle_negative_exponent(const Pack
|
||||
|
||||
// This routine handles negative exponents.
|
||||
// The return value is either 0, 1, or -1.
|
||||
|
||||
const Scalar pos_zero = Scalar(0);
|
||||
const Scalar all_ones = ptrue<Scalar>(Scalar());
|
||||
const Scalar pos_one = Scalar(1);
|
||||
|
||||
const Packet cst_pos_one = pset1<Packet>(pos_one);
|
||||
|
||||
const Packet cst_pos_one = pset1<Packet>(Scalar(1));
|
||||
const bool exponent_is_odd = exponent % ScalarExponent(2) != ScalarExponent(0);
|
||||
|
||||
const Packet exp_is_odd = pset1<Packet>(exponent_is_odd ? all_ones : pos_zero);
|
||||
const Packet exp_is_odd = exponent_is_odd ? ptrue<Packet>(x) : pzero<Packet>(x);
|
||||
|
||||
const Packet abs_x = pabs(x);
|
||||
const Packet abs_x_is_one = pcmp_eq(abs_x, cst_pos_one);
|
||||
|
||||
Packet result = pselect(exp_is_odd, x, abs_x);
|
||||
result = pand(abs_x_is_one, result);
|
||||
result = pselect(abs_x_is_one, result, pzero<Packet>(x));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -497,16 +497,56 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator/=(half& a, const half& b) {
|
||||
a = half(float(a) / float(b));
|
||||
return a;
|
||||
}
|
||||
|
||||
// Non-negative floating point numbers have a monotonic mapping to non-negative integers.
|
||||
// This property allows floating point numbers to be reinterpreted as integers for comparisons, which is useful if there
|
||||
// is no native floating point comparison operator. Floating point signedness is handled by the sign-magnitude
|
||||
// representation, whereas integers typically use two's complement. Converting the bit pattern from sign-magnitude to
|
||||
// two's complement allows the transformed bit patterns be compared as signed integers. All edge cases (+/-0 and +/-
|
||||
// infinity) are handled automatically, except NaN.
|
||||
//
|
||||
// fp16 uses 1 sign bit, 5 exponent bits, and 10 mantissa bits. The bit pattern conveys NaN when all the exponent
|
||||
// bits (5) are set, and at least one mantissa bit is set. The sign bit is irrelevant for determining NaN. To check for
|
||||
// NaN, clear the sign bit and check if the integral representation is greater than 01111100000000. To test
|
||||
// for non-NaN, clear the sign bit and check if the integeral representation is less than or equal to 01111100000000.
|
||||
|
||||
// convert sign-magnitude representation to two's complement
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC int16_t mapToSigned(uint16_t a) {
|
||||
constexpr uint16_t kAbsMask = (1 << 15) - 1;
|
||||
// If the sign bit is set, clear the sign bit and return the (integer) negation. Otherwise, return the input.
|
||||
return (a >> 15) ? -(a & kAbsMask) : a;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool isOrdered(const half& a, const half& b) {
|
||||
constexpr uint16_t kInf = ((1 << 5) - 1) << 10;
|
||||
constexpr uint16_t kAbsMask = (1 << 15) - 1;
|
||||
return numext::maxi(a.x & kAbsMask, b.x & kAbsMask) <= kInf;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator==(const half& a, const half& b) {
|
||||
return numext::equal_strict(float(a), float(b));
|
||||
bool result = mapToSigned(a.x) == mapToSigned(b.x);
|
||||
result &= isOrdered(a, b);
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator!=(const half& a, const half& b) {
|
||||
return numext::not_equal_strict(float(a), float(b));
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator!=(const half& a, const half& b) { return !(a == b); }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<(const half& a, const half& b) {
|
||||
bool result = mapToSigned(a.x) < mapToSigned(b.x);
|
||||
result &= isOrdered(a, b);
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<=(const half& a, const half& b) {
|
||||
bool result = mapToSigned(a.x) <= mapToSigned(b.x);
|
||||
result &= isOrdered(a, b);
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>(const half& a, const half& b) {
|
||||
bool result = mapToSigned(a.x) > mapToSigned(b.x);
|
||||
result &= isOrdered(a, b);
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>=(const half& a, const half& b) {
|
||||
bool result = mapToSigned(a.x) >= mapToSigned(b.x);
|
||||
result &= isOrdered(a, b);
|
||||
return result;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<(const half& a, const half& b) { return float(a) < float(b); }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator<=(const half& a, const half& b) { return float(a) <= float(b); }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>(const half& a, const half& b) { return float(a) > float(b); }
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator>=(const half& a, const half& b) { return float(a) >= float(b); }
|
||||
|
||||
#if EIGEN_COMP_CLANG && defined(EIGEN_GPUCC)
|
||||
#pragma pop_macro("EIGEN_DEVICE_FUNC")
|
||||
@ -706,7 +746,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isnan)(const half& a) {
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool(isfinite)(const half& a) {
|
||||
return !(isinf EIGEN_NOT_A_MACRO(a)) && !(isnan EIGEN_NOT_A_MACRO(a));
|
||||
#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) || defined(EIGEN_HAS_BUILTIN_FLOAT16)
|
||||
return (numext::bit_cast<numext::uint16_t>(a.x) & 0x7fff) < 0x7c00;
|
||||
#else
|
||||
return (a.x & 0x7fff) < 0x7c00;
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
|
||||
|
@ -31,6 +31,15 @@ namespace internal {
|
||||
#define EIGEN_GPU_HAS_FP16_ARITHMETIC 1
|
||||
#endif
|
||||
|
||||
// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
|
||||
// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
|
||||
// of the functions, while the latter can only deal with one of them.
|
||||
#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
|
||||
#define EIGEN_HAS_GPU_DEVICE_FUNCTIONS 1
|
||||
#else
|
||||
#define EIGEN_HAS_GPU_DEVICE_FUNCTIONS 0
|
||||
#endif
|
||||
|
||||
// Make sure this is only available when targeting a GPU: we don't want to
|
||||
// introduce conflicts between these packet_traits definitions and the ones
|
||||
// we'll use on the host side (SSE, AVX, ...)
|
||||
@ -74,7 +83,10 @@ struct packet_traits<float> : default_packet_traits {
|
||||
HasGammaSampleDerAlpha = 1,
|
||||
HasIGammac = 1,
|
||||
HasBetaInc = 1,
|
||||
HasBlend = 0
|
||||
|
||||
HasBlend = 0,
|
||||
HasFloor = 1,
|
||||
HasCmp = EIGEN_HAS_GPU_DEVICE_FUNCTIONS
|
||||
};
|
||||
};
|
||||
|
||||
@ -143,10 +155,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from)
|
||||
return make_double2(from, from);
|
||||
}
|
||||
|
||||
// We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
|
||||
// invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
|
||||
// of the functions, while the latter can only deal with one of them.
|
||||
#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
|
||||
#if EIGEN_HAS_GPU_DEVICE_FUNCTIONS
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a, const float& b) {
|
||||
return __int_as_float(__float_as_int(a) & __float_as_int(b));
|
||||
@ -259,8 +268,7 @@ template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pcmp_le<double2>(const double2& a, const double2& b) {
|
||||
return make_double2(le_mask(a.x, b.x), le_mask(a.y, b.y));
|
||||
}
|
||||
#endif // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG &&
|
||||
// !EIGEN_COMP_NVCC)
|
||||
#endif // EIGEN_HAS_GPU_DEVICE_FUNCTIONS
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
|
||||
|
@ -1287,6 +1287,14 @@ template <>
|
||||
EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c) {
|
||||
return vfma_f32(c, a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
|
||||
return vfmsq_f32(c, a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2f pnmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c) {
|
||||
return vfms_f32(c, a, b);
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
|
||||
@ -1296,7 +1304,31 @@ template <>
|
||||
EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c) {
|
||||
return vmla_f32(c, a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
|
||||
return vmlsq_f32(c, a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2f pnmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c) {
|
||||
return vmls_f32(c, a, b);
|
||||
}
|
||||
#endif
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
|
||||
return pnegate(pnmadd(a, b, c));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2f pmsub(const Packet2f& a, const Packet2f& b, const Packet2f& c) {
|
||||
return pnegate(pnmadd(a, b, c));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
|
||||
return pnegate(pmadd(a, b, c));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2f pnmsub(const Packet2f& a, const Packet2f& b, const Packet2f& c) {
|
||||
return pnegate(pmadd(a, b, c));
|
||||
}
|
||||
|
||||
// No FMA instruction for int, so use MLA unconditionally.
|
||||
template <>
|
||||
@ -5242,13 +5274,28 @@ template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
|
||||
return vfmaq_f64(c, a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pnmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
|
||||
return vfmsq_f64(c, a, b);
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
|
||||
return vmlaq_f64(c, a, b);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pnmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
|
||||
return vmlsq_f64(c, a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pmsub(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
|
||||
return pnegate(pnmadd(a, b, c));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pnmsub(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
|
||||
return pnegate(pmadd(a, b, c));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
|
||||
return vminq_f64(a, b);
|
||||
@ -5657,18 +5704,33 @@ EIGEN_STRONG_INLINE Packet4hf pmadd(const Packet4hf& a, const Packet4hf& b, cons
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8hf pmsub(const Packet8hf& a, const Packet8hf& b, const Packet8hf& c) {
|
||||
return vfmaq_f16(pnegate(c), a, b);
|
||||
EIGEN_STRONG_INLINE Packet8hf pnmadd(const Packet8hf& a, const Packet8hf& b, const Packet8hf& c) {
|
||||
return vfmsq_f16(c, a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4hf pnmadd(const Packet4hf& a, const Packet4hf& b, const Packet4hf& c) {
|
||||
return vfma_f16(c, pnegate(a), b);
|
||||
return vfms_f16(c, a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8hf pmsub(const Packet8hf& a, const Packet8hf& b, const Packet8hf& c) {
|
||||
return pnegate(pnmadd(a, b, c));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4hf pmsub(const Packet4hf& a, const Packet4hf& b, const Packet4hf& c) {
|
||||
return pnegate(pnmadd(a, b, c));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8hf pnmsub(const Packet8hf& a, const Packet8hf& b, const Packet8hf& c) {
|
||||
return pnegate(pmadd(a, b, c));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4hf pnmsub(const Packet4hf& a, const Packet4hf& b, const Packet4hf& c) {
|
||||
return vfma_f16(pnegate(c), pnegate(a), b);
|
||||
return pnegate(pmadd(a, b, c));
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -1857,220 +1857,6 @@ EIGEN_STRONG_INLINE void punpackp(Packet4f* vecs) {
|
||||
vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {
|
||||
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||
// (from Nehalem to Haswell)
|
||||
// #ifdef EIGEN_VECTORIZE_SSE3
|
||||
// Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3));
|
||||
// return pfirst<Packet4f>(_mm_hadd_ps(tmp, tmp));
|
||||
// #else
|
||||
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a, a));
|
||||
return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp, tmp, 1)));
|
||||
// #endif
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) {
|
||||
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||
// (from Nehalem to Haswell)
|
||||
// #ifdef EIGEN_VECTORIZE_SSE3
|
||||
// return pfirst<Packet2d>(_mm_hadd_pd(a, a));
|
||||
// #else
|
||||
return pfirst<Packet2d>(_mm_add_sd(a, _mm_unpackhi_pd(a, a)));
|
||||
// #endif
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux<Packet2l>(const Packet2l& a) {
|
||||
return pfirst<Packet2l>(_mm_add_epi64(a, _mm_unpackhi_epi64(a, a)));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSSE3
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) {
|
||||
Packet4i tmp0 = _mm_hadd_epi32(a, a);
|
||||
return pfirst<Packet4i>(_mm_hadd_epi32(tmp0, tmp0));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux<Packet4ui>(const Packet4ui& a) {
|
||||
Packet4ui tmp0 = _mm_hadd_epi32(a, a);
|
||||
return pfirst<Packet4ui>(_mm_hadd_epi32(tmp0, tmp0));
|
||||
}
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a) {
|
||||
Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a, a));
|
||||
return pfirst(tmp) + pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux<Packet4ui>(const Packet4ui& a) {
|
||||
Packet4ui tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a, a));
|
||||
return pfirst(tmp) + pfirst<Packet4ui>(_mm_shuffle_epi32(tmp, 1));
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux<Packet16b>(const Packet16b& a) {
|
||||
Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a, a));
|
||||
return (pfirst(tmp) != 0) || (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) != 0);
|
||||
}
|
||||
|
||||
// Other reduction functions:
|
||||
|
||||
// mul
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) {
|
||||
Packet4f tmp = _mm_mul_ps(a, _mm_movehl_ps(a, a));
|
||||
return pfirst<Packet4f>(_mm_mul_ss(tmp, _mm_shuffle_ps(tmp, tmp, 1)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) {
|
||||
return pfirst<Packet2d>(_mm_mul_sd(a, _mm_unpackhi_pd(a, a)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux_mul<Packet2l>(const Packet2l& a) {
|
||||
EIGEN_ALIGN16 int64_t aux[2];
|
||||
pstore(aux, a);
|
||||
return aux[0] * aux[1];
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a) {
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (e.g., reusing pmul is very slow!)
|
||||
// TODO try to call _mm_mul_epu32 directly
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
pstore(aux, a);
|
||||
return (aux[0] * aux[1]) * (aux[2] * aux[3]);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux_mul<Packet4ui>(const Packet4ui& a) {
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., reusing pmul is very slow !)
|
||||
// TODO try to call _mm_mul_epu32 directly
|
||||
EIGEN_ALIGN16 uint32_t aux[4];
|
||||
pstore(aux, a);
|
||||
return (aux[0] * aux[1]) * (aux[2] * aux[3]);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_mul<Packet16b>(const Packet16b& a) {
|
||||
Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a, a));
|
||||
return ((pfirst<Packet4i>(tmp) == 0x01010101) && (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) == 0x01010101));
|
||||
}
|
||||
|
||||
// min
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) {
|
||||
Packet4f tmp = _mm_min_ps(a, _mm_movehl_ps(a, a));
|
||||
return pfirst<Packet4f>(_mm_min_ss(tmp, _mm_shuffle_ps(tmp, tmp, 1)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) {
|
||||
return pfirst<Packet2d>(_mm_min_sd(a, _mm_unpackhi_pd(a, a)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a) {
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 3, 2)));
|
||||
return pfirst<Packet4i>(_mm_min_epi32(tmp, _mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
pstore(aux, a);
|
||||
int aux0 = aux[0] < aux[1] ? aux[0] : aux[1];
|
||||
int aux2 = aux[2] < aux[3] ? aux[2] : aux[3];
|
||||
return aux0 < aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux_min<Packet4ui>(const Packet4ui& a) {
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4ui tmp = _mm_min_epu32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 3, 2)));
|
||||
return pfirst<Packet4ui>(_mm_min_epu32(tmp, _mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 uint32_t aux[4];
|
||||
pstore(aux, a);
|
||||
uint32_t aux0 = aux[0] < aux[1] ? aux[0] : aux[1];
|
||||
uint32_t aux2 = aux[2] < aux[3] ? aux[2] : aux[3];
|
||||
return aux0 < aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
|
||||
// max
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) {
|
||||
Packet4f tmp = _mm_max_ps(a, _mm_movehl_ps(a, a));
|
||||
return pfirst<Packet4f>(_mm_max_ss(tmp, _mm_shuffle_ps(tmp, tmp, 1)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) {
|
||||
return pfirst<Packet2d>(_mm_max_sd(a, _mm_unpackhi_pd(a, a)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) {
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 3, 2)));
|
||||
return pfirst<Packet4i>(_mm_max_epi32(tmp, _mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 int aux[4];
|
||||
pstore(aux, a);
|
||||
int aux0 = aux[0] > aux[1] ? aux[0] : aux[1];
|
||||
int aux2 = aux[2] > aux[3] ? aux[2] : aux[3];
|
||||
return aux0 > aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux_max<Packet4ui>(const Packet4ui& a) {
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
Packet4ui tmp = _mm_max_epu32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 3, 2)));
|
||||
return pfirst<Packet4ui>(_mm_max_epu32(tmp, _mm_shuffle_epi32(tmp, 1)));
|
||||
#else
|
||||
// after some experiments, it is seems this is the fastest way to implement it
|
||||
// for GCC (eg., it does not like using std::min after the pstore !!)
|
||||
EIGEN_ALIGN16 uint32_t aux[4];
|
||||
pstore(aux, a);
|
||||
uint32_t aux0 = aux[0] > aux[1] ? aux[0] : aux[1];
|
||||
uint32_t aux2 = aux[2] > aux[3] ? aux[2] : aux[3];
|
||||
return aux0 > aux2 ? aux0 : aux2;
|
||||
#endif // EIGEN_VECTORIZE_SSE4_1
|
||||
}
|
||||
|
||||
// not needed yet
|
||||
// template<> EIGEN_STRONG_INLINE bool predux_all(const Packet4f& x)
|
||||
// {
|
||||
// return _mm_movemask_ps(x) == 0xF;
|
||||
// }
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet2d& x) {
|
||||
return _mm_movemask_pd(x) != 0x0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x) {
|
||||
return _mm_movemask_ps(x) != 0x0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet2l& x) {
|
||||
return _mm_movemask_pd(_mm_castsi128_pd(x)) != 0x0;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4i& x) {
|
||||
return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0;
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4ui& x) {
|
||||
return _mm_movemask_ps(_mm_castsi128_ps(x)) != 0x0;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
|
||||
_MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
|
||||
}
|
||||
|
324
Eigen/src/Core/arch/SSE/Reductions.h
Normal file
324
Eigen/src/Core/arch/SSE/Reductions.h
Normal file
@ -0,0 +1,324 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2025 Charlie Schlosser <cs.schlosser@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#ifndef EIGEN_REDUCTIONS_SSE_H
|
||||
#define EIGEN_REDUCTIONS_SSE_H
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename Packet>
|
||||
struct sse_add_wrapper {
|
||||
static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return padd<Packet>(a, b); }
|
||||
};
|
||||
|
||||
template <typename Packet>
|
||||
struct sse_mul_wrapper {
|
||||
static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return pmul<Packet>(a, b); }
|
||||
};
|
||||
|
||||
template <typename Packet>
|
||||
struct sse_min_wrapper {
|
||||
static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return pmin<Packet>(a, b); }
|
||||
};
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
struct sse_min_prop_wrapper {
|
||||
static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) {
|
||||
return pmin<NaNPropagation, Packet>(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Packet>
|
||||
struct sse_max_wrapper {
|
||||
static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) { return pmax<Packet>(a, b); }
|
||||
};
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
struct sse_max_prop_wrapper {
|
||||
static EIGEN_STRONG_INLINE Packet packetOp(const Packet& a, const Packet& b) {
|
||||
return pmax<NaNPropagation, Packet>(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Packet, typename Op>
|
||||
struct sse_predux_common;
|
||||
|
||||
template <typename Packet>
|
||||
struct sse_predux_impl : sse_predux_common<Packet, sse_add_wrapper<Packet>> {};
|
||||
|
||||
template <typename Packet>
|
||||
struct sse_predux_mul_impl : sse_predux_common<Packet, sse_mul_wrapper<Packet>> {};
|
||||
|
||||
template <typename Packet>
|
||||
struct sse_predux_min_impl : sse_predux_common<Packet, sse_min_wrapper<Packet>> {};
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
struct sse_predux_min_prop_impl : sse_predux_common<Packet, sse_min_prop_wrapper<NaNPropagation, Packet>> {};
|
||||
|
||||
template <typename Packet>
|
||||
struct sse_predux_max_impl : sse_predux_common<Packet, sse_max_wrapper<Packet>> {};
|
||||
|
||||
template <int NaNPropagation, typename Packet>
|
||||
struct sse_predux_max_prop_impl : sse_predux_common<Packet, sse_max_prop_wrapper<NaNPropagation, Packet>> {};
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet16b -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux(const Packet16b& a) {
|
||||
Packet4i tmp = _mm_or_si128(a, _mm_unpackhi_epi64(a, a));
|
||||
return (pfirst(tmp) != 0) || (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) != 0);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_mul(const Packet16b& a) {
|
||||
Packet4i tmp = _mm_and_si128(a, _mm_unpackhi_epi64(a, a));
|
||||
return ((pfirst<Packet4i>(tmp) == 0x01010101) && (pfirst<Packet4i>(_mm_shuffle_epi32(tmp, 1)) == 0x01010101));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_min(const Packet16b& a) {
|
||||
return predux_mul(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_max(const Packet16b& a) {
|
||||
return predux(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet16b& a) {
|
||||
return predux(a);
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4i -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <typename Op>
|
||||
struct sse_predux_common<Packet4i, Op> {
|
||||
static EIGEN_STRONG_INLINE int run(const Packet4i& a) {
|
||||
Packet4i tmp;
|
||||
tmp = Op::packetOp(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)));
|
||||
tmp = Op::packetOp(tmp, _mm_unpackhi_epi32(tmp, tmp));
|
||||
return _mm_cvtsi128_si32(tmp);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux(const Packet4i& a) {
|
||||
return sse_predux_impl<Packet4i>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) {
|
||||
return sse_predux_mul_impl<Packet4i>::run(a);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) {
|
||||
return sse_predux_min_impl<Packet4i>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) {
|
||||
return sse_predux_max_impl<Packet4i>::run(a);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4i& a) {
|
||||
return _mm_movemask_ps(_mm_castsi128_ps(a)) != 0x0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4ui -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <typename Op>
|
||||
struct sse_predux_common<Packet4ui, Op> {
|
||||
static EIGEN_STRONG_INLINE uint32_t run(const Packet4ui& a) {
|
||||
Packet4ui tmp;
|
||||
tmp = Op::packetOp(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 1, 2, 3)));
|
||||
tmp = Op::packetOp(tmp, _mm_unpackhi_epi32(tmp, tmp));
|
||||
return static_cast<uint32_t>(_mm_cvtsi128_si32(tmp));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux(const Packet4ui& a) {
|
||||
return sse_predux_impl<Packet4ui>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux_mul(const Packet4ui& a) {
|
||||
return sse_predux_mul_impl<Packet4ui>::run(a);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux_min(const Packet4ui& a) {
|
||||
return sse_predux_min_impl<Packet4ui>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE uint32_t predux_max(const Packet4ui& a) {
|
||||
return sse_predux_max_impl<Packet4ui>::run(a);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4ui& a) {
|
||||
return _mm_movemask_ps(_mm_castsi128_ps(a)) != 0x0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet2l -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <typename Op>
|
||||
struct sse_predux_common<Packet2l, Op> {
|
||||
static EIGEN_STRONG_INLINE int64_t run(const Packet2l& a) {
|
||||
Packet2l tmp;
|
||||
tmp = Op::packetOp(a, _mm_unpackhi_epi64(a, a));
|
||||
return pfirst(tmp);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int64_t predux(const Packet2l& a) {
|
||||
return sse_predux_impl<Packet2l>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet2l& a) {
|
||||
return _mm_movemask_pd(_mm_castsi128_pd(a)) != 0x0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet4f -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <typename Op>
|
||||
struct sse_predux_common<Packet4f, Op> {
|
||||
static EIGEN_STRONG_INLINE float run(const Packet4f& a) {
|
||||
Packet4f tmp;
|
||||
tmp = Op::packetOp(a, _mm_movehl_ps(a, a));
|
||||
#ifdef EIGEN_VECTORIZE_SSE3
|
||||
tmp = Op::packetOp(tmp, _mm_movehdup_ps(tmp));
|
||||
#else
|
||||
tmp = Op::packetOp(tmp, _mm_shuffle_ps(tmp, tmp, 1));
|
||||
#endif
|
||||
return _mm_cvtss_f32(tmp);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux(const Packet4f& a) {
|
||||
return sse_predux_impl<Packet4f>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) {
|
||||
return sse_predux_mul_impl<Packet4f>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min(const Packet4f& a) {
|
||||
return sse_predux_min_impl<Packet4f>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min<PropagateNumbers>(const Packet4f& a) {
|
||||
return sse_predux_min_prop_impl<PropagateNumbers, Packet4f>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_min<PropagateNaN>(const Packet4f& a) {
|
||||
return sse_predux_min_prop_impl<PropagateNaN, Packet4f>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max(const Packet4f& a) {
|
||||
return sse_predux_max_impl<Packet4f>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max<PropagateNumbers>(const Packet4f& a) {
|
||||
return sse_predux_max_prop_impl<PropagateNumbers, Packet4f>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE float predux_max<PropagateNaN>(const Packet4f& a) {
|
||||
return sse_predux_max_prop_impl<PropagateNaN, Packet4f>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet4f& a) {
|
||||
return _mm_movemask_ps(a) != 0x0;
|
||||
}
|
||||
|
||||
/* -- -- -- -- -- -- -- -- -- -- -- -- Packet2d -- -- -- -- -- -- -- -- -- -- -- -- */
|
||||
|
||||
template <typename Op>
|
||||
struct sse_predux_common<Packet2d, Op> {
|
||||
static EIGEN_STRONG_INLINE double run(const Packet2d& a) {
|
||||
Packet2d tmp;
|
||||
tmp = Op::packetOp(a, _mm_unpackhi_pd(a, a));
|
||||
return _mm_cvtsd_f64(tmp);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux(const Packet2d& a) {
|
||||
return sse_predux_impl<Packet2d>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_mul(const Packet2d& a) {
|
||||
return sse_predux_mul_impl<Packet2d>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min(const Packet2d& a) {
|
||||
return sse_predux_min_impl<Packet2d>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min<PropagateNumbers>(const Packet2d& a) {
|
||||
return sse_predux_min_prop_impl<PropagateNumbers, Packet2d>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_min<PropagateNaN>(const Packet2d& a) {
|
||||
return sse_predux_min_prop_impl<PropagateNaN, Packet2d>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max(const Packet2d& a) {
|
||||
return sse_predux_max_impl<Packet2d>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max<PropagateNumbers>(const Packet2d& a) {
|
||||
return sse_predux_max_prop_impl<PropagateNumbers, Packet2d>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE double predux_max<PropagateNaN>(const Packet2d& a) {
|
||||
return sse_predux_max_prop_impl<PropagateNaN, Packet2d>::run(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE bool predux_any(const Packet2d& a) {
|
||||
return _mm_movemask_pd(a) != 0x0;
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_REDUCTIONS_SSE_H
|
@ -55,7 +55,7 @@ namespace internal {
|
||||
ConjugateRhs, ColMajor, 1> { \
|
||||
typedef gebp_traits<EIGTYPE, EIGTYPE> Traits; \
|
||||
\
|
||||
static void run(Index rows, Index cols, Index depth, const EIGTYPE* _lhs, Index lhsStride, const EIGTYPE* _rhs, \
|
||||
static void run(Index rows, Index cols, Index depth, const EIGTYPE* lhs_, Index lhsStride, const EIGTYPE* rhs_, \
|
||||
Index rhsStride, EIGTYPE* res, Index resIncr, Index resStride, EIGTYPE alpha, \
|
||||
level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/, GemmParallelInfo<Index>* /*info = 0*/) { \
|
||||
using std::conj; \
|
||||
@ -84,20 +84,20 @@ namespace internal {
|
||||
\
|
||||
/* Set a, b, c */ \
|
||||
if ((LhsStorageOrder == ColMajor) && (ConjugateLhs)) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs, m, k, OuterStride<>(lhsStride)); \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(lhs_, m, k, OuterStride<>(lhsStride)); \
|
||||
a_tmp = lhs.conjugate(); \
|
||||
a = a_tmp.data(); \
|
||||
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
|
||||
} else \
|
||||
a = _lhs; \
|
||||
a = lhs_; \
|
||||
\
|
||||
if ((RhsStorageOrder == ColMajor) && (ConjugateRhs)) { \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs, k, n, OuterStride<>(rhsStride)); \
|
||||
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(rhs_, k, n, OuterStride<>(rhsStride)); \
|
||||
b_tmp = rhs.conjugate(); \
|
||||
b = b_tmp.data(); \
|
||||
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
|
||||
} else \
|
||||
b = _rhs; \
|
||||
b = rhs_; \
|
||||
\
|
||||
BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, \
|
||||
(const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
|
||||
@ -116,6 +116,88 @@ GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
|
||||
GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_)
|
||||
#endif
|
||||
|
||||
// If OpenBLAS with BUILD_BFLOAT16=1 support is available,
|
||||
// use sbgemm for bfloat16.
|
||||
#if EIGEN_USE_OPENBLAS_BFLOAT16
|
||||
|
||||
extern "C" {
|
||||
// OpenBLAS prototype.
|
||||
void sbgemm_(const char* trans_a, const char* trans_b, const int* M, const int* N, const int* K, const float* alpha,
|
||||
const Eigen::bfloat16* A, const int* lda, const Eigen::bfloat16* B, const int* ldb, const float* beta,
|
||||
float* C, const int* ldc);
|
||||
} // extern "C"
|
||||
|
||||
template <typename Index, int LhsStorageOrder, bool ConjugateLhs, int RhsStorageOrder, bool ConjugateRhs>
|
||||
struct general_matrix_matrix_product<Index, Eigen::bfloat16, LhsStorageOrder, ConjugateLhs, Eigen::bfloat16,
|
||||
RhsStorageOrder, ConjugateRhs, ColMajor, 1> {
|
||||
typedef gebp_traits<Eigen::bfloat16, Eigen::bfloat16> Traits;
|
||||
|
||||
static void run(Index rows, Index cols, Index depth, const Eigen::bfloat16* lhs_, Index lhsStride,
|
||||
const Eigen::bfloat16* rhs_, Index rhsStride, Eigen::bfloat16* res, Index resIncr, Index resStride,
|
||||
Eigen::bfloat16 alpha, level3_blocking<Eigen::bfloat16, Eigen::bfloat16>& /*blocking*/,
|
||||
GemmParallelInfo<Index>* /*info = 0*/) {
|
||||
using std::conj;
|
||||
if (rows == 0 || cols == 0 || depth == 0) return;
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(resIncr);
|
||||
eigen_assert(resIncr == 1);
|
||||
char transa, transb;
|
||||
BlasIndex m, n, k, lda, ldb, ldc;
|
||||
const Eigen::bfloat16 *a, *b;
|
||||
|
||||
float falpha = static_cast<float>(alpha);
|
||||
float fbeta = float(1.0);
|
||||
|
||||
using MatrixXbf16 = Matrix<Eigen::bfloat16, Dynamic, Dynamic>;
|
||||
MatrixXbf16 a_tmp, b_tmp;
|
||||
MatrixXf r_tmp;
|
||||
|
||||
/* Set transpose options */
|
||||
transa = (LhsStorageOrder == RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N';
|
||||
transb = (RhsStorageOrder == RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N';
|
||||
|
||||
/* Set m, n, k */
|
||||
m = convert_index<BlasIndex>(rows);
|
||||
n = convert_index<BlasIndex>(cols);
|
||||
k = convert_index<BlasIndex>(depth);
|
||||
|
||||
/* Set lda, ldb, ldc */
|
||||
lda = convert_index<BlasIndex>(lhsStride);
|
||||
ldb = convert_index<BlasIndex>(rhsStride);
|
||||
ldc = convert_index<BlasIndex>(m);
|
||||
|
||||
/* Set a, b, c */
|
||||
if ((LhsStorageOrder == ColMajor) && (ConjugateLhs)) {
|
||||
Map<const MatrixXbf16, 0, OuterStride<> > lhs(lhs_, m, k, OuterStride<>(lhsStride));
|
||||
a_tmp = lhs.conjugate();
|
||||
a = a_tmp.data();
|
||||
lda = convert_index<BlasIndex>(a_tmp.outerStride());
|
||||
} else {
|
||||
a = lhs_;
|
||||
}
|
||||
|
||||
if ((RhsStorageOrder == ColMajor) && (ConjugateRhs)) {
|
||||
Map<const MatrixXbf16, 0, OuterStride<> > rhs(rhs_, k, n, OuterStride<>(rhsStride));
|
||||
b_tmp = rhs.conjugate();
|
||||
b = b_tmp.data();
|
||||
ldb = convert_index<BlasIndex>(b_tmp.outerStride());
|
||||
} else {
|
||||
b = rhs_;
|
||||
}
|
||||
|
||||
// Evaluate to a temporary intermediate array.
|
||||
r_tmp.resize(m, n);
|
||||
|
||||
sbgemm_(&transa, &transb, &m, &n, &k, (const float*)&numext::real_ref(falpha), a, &lda, b, &ldb,
|
||||
(const float*)&numext::real_ref(fbeta), r_tmp.data(), &ldc);
|
||||
|
||||
// Cast to the output.
|
||||
Map<MatrixXbf16, 0, OuterStride<> > result(res, m, n, OuterStride<>(resStride));
|
||||
result = r_tmp.cast<Eigen::bfloat16>();
|
||||
}
|
||||
};
|
||||
|
||||
#endif // EIGEN_USE_OPENBLAS_SBGEMM
|
||||
|
||||
} // namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -164,6 +164,11 @@ struct selfadjoint_product_impl<Lhs, LhsMode, false, Rhs, 0, true> {
|
||||
|
||||
enum { LhsUpLo = LhsMode & (Upper | Lower) };
|
||||
|
||||
// Verify that the Rhs is a vector in the correct orientation.
|
||||
// Otherwise, we break the assumption that we are multiplying
|
||||
// MxN * Nx1.
|
||||
static_assert(Rhs::ColsAtCompileTime == 1, "The RHS must be a column vector.");
|
||||
|
||||
template <typename Dest>
|
||||
static EIGEN_DEVICE_FUNC void run(Dest& dest, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) {
|
||||
typedef typename Dest::Scalar ResScalar;
|
||||
|
@ -8,7 +8,7 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H)
|
||||
#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CORE_GPU_HIP_CUDA_DEFINES_H)
|
||||
#define EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H
|
||||
|
||||
// Note that we are using EIGEN_USE_HIP here instead of EIGEN_HIPCC...this is by design
|
||||
@ -98,4 +98,4 @@
|
||||
|
||||
#endif // gpu_assert
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H
|
||||
#endif // EIGEN_CORE_GPU_HIP_CUDA_DEFINES_H
|
@ -8,7 +8,7 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#if defined(EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H)
|
||||
#if defined(EIGEN_CORE_GPU_HIP_CUDA_DEFINES_H)
|
||||
|
||||
#ifndef EIGEN_PERMANENTLY_ENABLE_GPU_HIP_CUDA_DEFINES
|
||||
|
||||
@ -40,6 +40,6 @@
|
||||
|
||||
#endif // EIGEN_PERMANENTLY_ENABLE_GPU_HIP_CUDA_DEFINES
|
||||
|
||||
#undef EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H
|
||||
#undef EIGEN_CORE_GPU_HIP_CUDA_DEFINES_H
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_GPU_HIP_CUDA_DEFINES_H
|
||||
#endif // EIGEN_CORE_GPU_HIP_CUDA_DEFINES_H
|
@ -762,7 +762,7 @@ void swap(scoped_array<T>& a, scoped_array<T>& b) {
|
||||
* This is accomplished through alloca if this later is supported and if the required number of bytes
|
||||
* is below EIGEN_STACK_ALLOCATION_LIMIT.
|
||||
*/
|
||||
#ifdef EIGEN_ALLOCA
|
||||
#if defined(EIGEN_ALLOCA) && !defined(EIGEN_NO_ALLOCA)
|
||||
|
||||
#if EIGEN_DEFAULT_ALIGN_BYTES > 0
|
||||
// We always manually re-align the result of EIGEN_ALLOCA.
|
||||
@ -785,14 +785,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* pt
|
||||
#define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
|
||||
#endif
|
||||
|
||||
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
|
||||
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
||||
TYPE* NAME = (BUFFER) != 0 ? (BUFFER) \
|
||||
: reinterpret_cast<TYPE*>((sizeof(TYPE) * SIZE <= EIGEN_STACK_ALLOCATION_LIMIT) \
|
||||
? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE) * SIZE) \
|
||||
: Eigen::internal::aligned_malloc(sizeof(TYPE) * SIZE)); \
|
||||
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
|
||||
(BUFFER) == 0 ? NAME : 0, SIZE, sizeof(TYPE) * SIZE > EIGEN_STACK_ALLOCATION_LIMIT)
|
||||
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
|
||||
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
||||
TYPE* NAME = (BUFFER) != 0 ? (BUFFER) \
|
||||
: reinterpret_cast<TYPE*>((sizeof(TYPE) * (SIZE) <= EIGEN_STACK_ALLOCATION_LIMIT) \
|
||||
? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE) * (SIZE)) \
|
||||
: Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
|
||||
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
|
||||
(BUFFER) == 0 ? NAME : 0, SIZE, sizeof(TYPE) * (SIZE) > EIGEN_STACK_ALLOCATION_LIMIT)
|
||||
|
||||
#define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
|
||||
Eigen::internal::local_nested_eval_wrapper<XPR_T, N> EIGEN_CAT(NAME, _wrapper)( \
|
||||
@ -805,10 +805,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* pt
|
||||
|
||||
#else
|
||||
|
||||
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
|
||||
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
||||
TYPE* NAME = (BUFFER) != 0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE) * SIZE)); \
|
||||
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
|
||||
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER) \
|
||||
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
|
||||
TYPE* NAME = \
|
||||
(BUFFER) != 0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE) * (SIZE))); \
|
||||
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME, _stack_memory_destructor)( \
|
||||
(BUFFER) == 0 ? NAME : 0, SIZE, true)
|
||||
|
||||
#define ei_declare_local_nested_eval(XPR_T, XPR, N, NAME) \
|
||||
|
@ -345,7 +345,7 @@ EIGEN_DEVICE_FUNC void tridiagonalization_inplace(MatrixType& matA, CoeffVectorT
|
||||
|
||||
// Apply similarity transformation to remaining columns,
|
||||
// i.e., A = H A H' where H = I - h v v' and v = matA.col(i).tail(n-i-1)
|
||||
matA.col(i).coeffRef(i + 1) = (RealScalar)1;
|
||||
matA.col(i).coeffRef(i + 1) = Scalar(1);
|
||||
|
||||
hCoeffs.tail(n - i - 1).noalias() =
|
||||
(matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView<Lower>() *
|
||||
|
@ -85,6 +85,29 @@ class QuaternionBase : public RotationBase<Derived, 3> {
|
||||
return derived().coeffs();
|
||||
}
|
||||
|
||||
/** \returns a vector containing the coefficients, rearranged into the order [\c w, \c x, \c y, \c z].
|
||||
*
|
||||
* This is the order expected by the \code Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar&
|
||||
* z) \endcode constructor, but not the order of the internal vector representation. Therefore, it returns a newly
|
||||
* constructed vector.
|
||||
*
|
||||
* \sa QuaternionBase::coeffsScalarLast()
|
||||
* */
|
||||
EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients coeffsScalarFirst() const {
|
||||
return derived().coeffsScalarFirst();
|
||||
}
|
||||
|
||||
/** \returns a vector containing the coefficients in their original order [\c x, \c y, \c z, \c w].
|
||||
*
|
||||
* This is equivalent to \code coeffs() \endcode, but returns a newly constructed vector for uniformity with \code
|
||||
* coeffsScalarFirst() \endcode.
|
||||
*
|
||||
* \sa QuaternionBase::coeffsScalarFirst()
|
||||
* */
|
||||
EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients coeffsScalarLast() const {
|
||||
return derived().coeffsScalarLast();
|
||||
}
|
||||
|
||||
/** \returns a vector expression of the coefficients (x,y,z,w) */
|
||||
EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
|
||||
|
||||
@ -357,12 +380,23 @@ class Quaternion : public QuaternionBase<Quaternion<Scalar_, Options_> > {
|
||||
|
||||
EIGEN_DEVICE_FUNC static Quaternion UnitRandom();
|
||||
|
||||
EIGEN_DEVICE_FUNC static Quaternion FromCoeffsScalarLast(const Scalar& x, const Scalar& y, const Scalar& z,
|
||||
const Scalar& w);
|
||||
|
||||
EIGEN_DEVICE_FUNC static Quaternion FromCoeffsScalarFirst(const Scalar& w, const Scalar& x, const Scalar& y,
|
||||
const Scalar& z);
|
||||
|
||||
template <typename Derived1, typename Derived2>
|
||||
EIGEN_DEVICE_FUNC static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Coefficients coeffsScalarLast() const { return m_coeffs; }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Coefficients coeffsScalarFirst() const {
|
||||
return {m_coeffs.w(), m_coeffs.x(), m_coeffs.y(), m_coeffs.z()};
|
||||
}
|
||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))
|
||||
|
||||
#ifdef EIGEN_QUATERNION_PLUGIN
|
||||
@ -437,6 +471,12 @@ class Map<const Quaternion<Scalar_>, Options_> : public QuaternionBase<Map<const
|
||||
|
||||
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Coefficients coeffsScalarLast() const { return m_coeffs; }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Coefficients coeffsScalarFirst() const {
|
||||
return {m_coeffs.w(), m_coeffs.x(), m_coeffs.y(), m_coeffs.z()};
|
||||
}
|
||||
|
||||
protected:
|
||||
const Coefficients m_coeffs;
|
||||
};
|
||||
@ -473,6 +513,12 @@ class Map<Quaternion<Scalar_>, Options_> : public QuaternionBase<Map<Quaternion<
|
||||
EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
|
||||
EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Coefficients coeffsScalarLast() const { return m_coeffs; }
|
||||
|
||||
EIGEN_DEVICE_FUNC inline Coefficients coeffsScalarFirst() const {
|
||||
return {m_coeffs.w(), m_coeffs.x(), m_coeffs.y(), m_coeffs.z()};
|
||||
}
|
||||
|
||||
protected:
|
||||
Coefficients m_coeffs;
|
||||
};
|
||||
@ -694,6 +740,35 @@ EIGEN_DEVICE_FUNC Quaternion<Scalar, Options> Quaternion<Scalar, Options>::UnitR
|
||||
return Quaternion(a * sin(u2), a * cos(u2), b * sin(u3), b * cos(u3));
|
||||
}
|
||||
|
||||
/** Constructs a quaternion from its coefficients in the order [\c x, \c y, \c z, \c w], i.e. vector part [\c x, \c y,
|
||||
* \c z] first, scalar part \a w LAST.
|
||||
*
|
||||
* This factory accepts the parameters in the same order as the underlying coefficient vector. Consider using this
|
||||
* factory function to make the parameter ordering explicit.
|
||||
*/
|
||||
template <typename Scalar, int Options>
|
||||
EIGEN_DEVICE_FUNC Quaternion<Scalar, Options> Quaternion<Scalar, Options>::FromCoeffsScalarLast(const Scalar& x,
|
||||
const Scalar& y,
|
||||
const Scalar& z,
|
||||
const Scalar& w) {
|
||||
return Quaternion(w, x, y, z);
|
||||
}
|
||||
|
||||
/** Constructs a quaternion from its coefficients in the order [\c w, \c x, \c y, \c z], i.e. scalar part \a w FIRST,
|
||||
* vector part [\c x, \c y, \c z] last.
|
||||
*
|
||||
* This factory accepts the parameters in the same order as the constructor \code Quaternion(const Scalar& w, const
|
||||
* Scalar& x, const Scalar& y, const Scalar& z) \endcode. Consider using this factory function to make the parameter
|
||||
* ordering explicit.
|
||||
*/
|
||||
template <typename Scalar, int Options>
|
||||
EIGEN_DEVICE_FUNC Quaternion<Scalar, Options> Quaternion<Scalar, Options>::FromCoeffsScalarFirst(const Scalar& w,
|
||||
const Scalar& x,
|
||||
const Scalar& y,
|
||||
const Scalar& z) {
|
||||
return Quaternion(w, x, y, z);
|
||||
}
|
||||
|
||||
/** Returns a quaternion representing a rotation between
|
||||
* the two arbitrary vectors \a a and \a b. In other words, the built
|
||||
* rotation represent a rotation sending the line of direction \a a
|
||||
|
@ -78,6 +78,17 @@ class FullPivLU : public SolverBase<FullPivLU<MatrixType_, PermutationIndex_> >
|
||||
typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime, PermutationIndex> PermutationPType;
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
/** \brief Reports whether the LU factorization was successful.
|
||||
*
|
||||
* \note This function always returns \c Success. It is provided for compatibility
|
||||
* with other factorization routines.
|
||||
* \returns \c Success
|
||||
*/
|
||||
ComputationInfo info() const {
|
||||
eigen_assert(m_isInitialized && "FullPivLU is not initialized.");
|
||||
return Success;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Default Constructor.
|
||||
*
|
||||
|
@ -268,7 +268,7 @@ struct Assignment<DstXprType, Inverse<XprType>,
|
||||
* \note This matrix must be invertible, otherwise the result is undefined. If you need an
|
||||
* invertibility check, do the following:
|
||||
* \li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck().
|
||||
* \li for the general case, use class FullPivLU.
|
||||
* \li for the general case, use class PartialPivLU.
|
||||
*
|
||||
* Example: \include MatrixBase_inverse.cpp
|
||||
* Output: \verbinclude MatrixBase_inverse.out
|
||||
|
@ -90,6 +90,17 @@ class PartialPivLU : public SolverBase<PartialPivLU<MatrixType_, PermutationInde
|
||||
typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime, PermutationIndex> TranspositionType;
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
/** \brief Reports whether the LU factorization was successful.
|
||||
*
|
||||
* \note This function always returns \c Success. It is provided for compatibility
|
||||
* with other factorization routines.
|
||||
* \returns \c Success
|
||||
*/
|
||||
ComputationInfo info() const {
|
||||
eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
|
||||
return Success;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Default Constructor.
|
||||
*
|
||||
|
@ -82,6 +82,17 @@ class FullPivHouseholderQR : public SolverBase<FullPivHouseholderQR<MatrixType_,
|
||||
typedef typename internal::plain_col_type<MatrixType>::type ColVectorType;
|
||||
typedef typename MatrixType::PlainObject PlainObject;
|
||||
|
||||
/** \brief Reports whether the QR factorization was successful.
|
||||
*
|
||||
* \note This function always returns \c Success. It is provided for compatibility
|
||||
* with other factorization routines.
|
||||
* \returns \c Success
|
||||
*/
|
||||
ComputationInfo info() const {
|
||||
eigen_assert(m_isInitialized && "FullPivHouseholderQR is not initialized.");
|
||||
return Success;
|
||||
}
|
||||
|
||||
/** \brief Default Constructor.
|
||||
*
|
||||
* The default constructor is useful in cases in which the user intends to
|
||||
|
@ -75,6 +75,17 @@ class HouseholderQR : public SolverBase<HouseholderQR<MatrixType_>> {
|
||||
typedef HouseholderSequence<MatrixType, internal::remove_all_t<typename HCoeffsType::ConjugateReturnType>>
|
||||
HouseholderSequenceType;
|
||||
|
||||
/** \brief Reports whether the QR factorization was successful.
|
||||
*
|
||||
* \note This function always returns \c Success. It is provided for compatibility
|
||||
* with other factorization routines.
|
||||
* \returns \c Success
|
||||
*/
|
||||
ComputationInfo info() const {
|
||||
eigen_assert(m_isInitialized && "HouseHolderQR is not initialized.");
|
||||
return Success;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Default Constructor.
|
||||
*
|
||||
|
@ -165,7 +165,8 @@ class BDCSVD : public SVDBase<BDCSVD<MatrixType_, Options_> > {
|
||||
*
|
||||
* \param matrix the matrix to decompose
|
||||
*/
|
||||
BDCSVD(const MatrixType& matrix) : m_algoswap(16), m_numIters(0) {
|
||||
template <typename Derived>
|
||||
BDCSVD(const MatrixBase<Derived>& matrix) : m_algoswap(16), m_numIters(0) {
|
||||
compute_impl(matrix, internal::get_computation_options(Options));
|
||||
}
|
||||
|
||||
@ -181,7 +182,9 @@ class BDCSVD : public SVDBase<BDCSVD<MatrixType_, Options_> > {
|
||||
* \deprecated Will be removed in the next major Eigen version. Options should
|
||||
* be specified in the \a Options template parameter.
|
||||
*/
|
||||
EIGEN_DEPRECATED BDCSVD(const MatrixType& matrix, unsigned int computationOptions) : m_algoswap(16), m_numIters(0) {
|
||||
template <typename Derived>
|
||||
EIGEN_DEPRECATED BDCSVD(const MatrixBase<Derived>& matrix, unsigned int computationOptions)
|
||||
: m_algoswap(16), m_numIters(0) {
|
||||
internal::check_svd_options_assertions<MatrixType, Options>(computationOptions, matrix.rows(), matrix.cols());
|
||||
compute_impl(matrix, computationOptions);
|
||||
}
|
||||
@ -193,7 +196,10 @@ class BDCSVD : public SVDBase<BDCSVD<MatrixType_, Options_> > {
|
||||
*
|
||||
* \param matrix the matrix to decompose
|
||||
*/
|
||||
BDCSVD& compute(const MatrixType& matrix) { return compute_impl(matrix, m_computationOptions); }
|
||||
template <typename Derived>
|
||||
BDCSVD& compute(const MatrixBase<Derived>& matrix) {
|
||||
return compute_impl(matrix, m_computationOptions);
|
||||
}
|
||||
|
||||
/** \brief Method performing the decomposition of given matrix, as specified by
|
||||
* the `computationOptions` parameter.
|
||||
@ -204,7 +210,8 @@ class BDCSVD : public SVDBase<BDCSVD<MatrixType_, Options_> > {
|
||||
* \deprecated Will be removed in the next major Eigen version. Options should
|
||||
* be specified in the \a Options template parameter.
|
||||
*/
|
||||
EIGEN_DEPRECATED BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions) {
|
||||
template <typename Derived>
|
||||
EIGEN_DEPRECATED BDCSVD& compute(const MatrixBase<Derived>& matrix, unsigned int computationOptions) {
|
||||
internal::check_svd_options_assertions<MatrixType, Options>(computationOptions, matrix.rows(), matrix.cols());
|
||||
return compute_impl(matrix, computationOptions);
|
||||
}
|
||||
@ -215,7 +222,8 @@ class BDCSVD : public SVDBase<BDCSVD<MatrixType_, Options_> > {
|
||||
}
|
||||
|
||||
private:
|
||||
BDCSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions);
|
||||
template <typename Derived>
|
||||
BDCSVD& compute_impl(const MatrixBase<Derived>& matrix, unsigned int computationOptions);
|
||||
void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
|
||||
void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
|
||||
void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals,
|
||||
@ -307,8 +315,13 @@ void BDCSVD<MatrixType, Options>::allocate(Index rows, Index cols, unsigned int
|
||||
} // end allocate
|
||||
|
||||
template <typename MatrixType, int Options>
|
||||
BDCSVD<MatrixType, Options>& BDCSVD<MatrixType, Options>::compute_impl(const MatrixType& matrix,
|
||||
template <typename Derived>
|
||||
BDCSVD<MatrixType, Options>& BDCSVD<MatrixType, Options>::compute_impl(const MatrixBase<Derived>& matrix,
|
||||
unsigned int computationOptions) {
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived, MatrixType);
|
||||
EIGEN_STATIC_ASSERT((std::is_same<typename Derived::Scalar, typename MatrixType::Scalar>::value),
|
||||
Input matrix must have the same Scalar type as the BDCSVD object.);
|
||||
|
||||
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
|
||||
std::cout << "\n\n\n================================================================================================="
|
||||
"=====================\n\n\n";
|
||||
|
@ -58,7 +58,8 @@ class BDCSVD_LAPACKE : public BDCSVD<MatrixType_, Options> {
|
||||
// construct this by moving from a parent object
|
||||
BDCSVD_LAPACKE(SVD&& svd) : SVD(std::move(svd)) {}
|
||||
|
||||
void compute_impl_lapacke(const MatrixType& matrix, unsigned int computationOptions) {
|
||||
template <typename Derived>
|
||||
void compute_impl_lapacke(const MatrixBase<Derived>& matrix, unsigned int computationOptions) {
|
||||
SVD::allocate(matrix.rows(), matrix.cols(), computationOptions);
|
||||
|
||||
SVD::m_nonzeroSingularValues = SVD::m_diagSize;
|
||||
@ -120,8 +121,8 @@ class BDCSVD_LAPACKE : public BDCSVD<MatrixType_, Options> {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename MatrixType_, int Options>
|
||||
BDCSVD<MatrixType_, Options>& BDCSVD_wrapper(BDCSVD<MatrixType_, Options>& svd, const MatrixType_& matrix,
|
||||
template <typename MatrixType_, int Options, typename Derived>
|
||||
BDCSVD<MatrixType_, Options>& BDCSVD_wrapper(BDCSVD<MatrixType_, Options>& svd, const MatrixBase<Derived>& matrix,
|
||||
int computationOptions) {
|
||||
// we need to move to the wrapper type and back
|
||||
BDCSVD_LAPACKE<MatrixType_, Options> tmpSvd(std::move(svd));
|
||||
@ -134,12 +135,13 @@ BDCSVD<MatrixType_, Options>& BDCSVD_wrapper(BDCSVD<MatrixType_, Options>& svd,
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
#define EIGEN_LAPACKE_SDD(EIGTYPE, EIGCOLROW, OPTIONS) \
|
||||
template <> \
|
||||
inline BDCSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>& \
|
||||
BDCSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>::compute_impl( \
|
||||
const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) { \
|
||||
return internal::lapacke_helpers::BDCSVD_wrapper(*this, matrix, computationOptions); \
|
||||
#define EIGEN_LAPACKE_SDD(EIGTYPE, EIGCOLROW, OPTIONS) \
|
||||
template <> \
|
||||
template <typename Derived> \
|
||||
inline BDCSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>& \
|
||||
BDCSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>::compute_impl( \
|
||||
const MatrixBase<Derived>& matrix, unsigned int computationOptions) { \
|
||||
return internal::lapacke_helpers::BDCSVD_wrapper(*this, matrix, computationOptions); \
|
||||
}
|
||||
|
||||
#define EIGEN_LAPACK_SDD_OPTIONS(OPTIONS) \
|
||||
|
@ -565,7 +565,10 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
*
|
||||
* \param matrix the matrix to decompose
|
||||
*/
|
||||
explicit JacobiSVD(const MatrixType& matrix) { compute_impl(matrix, internal::get_computation_options(Options)); }
|
||||
template <typename Derived>
|
||||
explicit JacobiSVD(const MatrixBase<Derived>& matrix) {
|
||||
compute_impl(matrix, internal::get_computation_options(Options));
|
||||
}
|
||||
|
||||
/** \brief Constructor performing the decomposition of given matrix using specified options
|
||||
* for computing unitaries.
|
||||
@ -580,8 +583,10 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
* be specified in the \a Options template parameter.
|
||||
*/
|
||||
// EIGEN_DEPRECATED // TODO(cantonios): re-enable after fixing a few 3p libraries that error on deprecation warnings.
|
||||
JacobiSVD(const MatrixType& matrix, unsigned int computationOptions) {
|
||||
internal::check_svd_options_assertions<MatrixType, Options>(computationOptions, matrix.rows(), matrix.cols());
|
||||
template <typename Derived>
|
||||
JacobiSVD(const MatrixBase<Derived>& matrix, unsigned int computationOptions) {
|
||||
internal::check_svd_options_assertions<MatrixBase<Derived>, Options>(computationOptions, matrix.rows(),
|
||||
matrix.cols());
|
||||
compute_impl(matrix, computationOptions);
|
||||
}
|
||||
|
||||
@ -590,7 +595,10 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
*
|
||||
* \param matrix the matrix to decompose
|
||||
*/
|
||||
JacobiSVD& compute(const MatrixType& matrix) { return compute_impl(matrix, m_computationOptions); }
|
||||
template <typename Derived>
|
||||
JacobiSVD& compute(const MatrixBase<Derived>& matrix) {
|
||||
return compute_impl(matrix, m_computationOptions);
|
||||
}
|
||||
|
||||
/** \brief Method performing the decomposition of given matrix, as specified by
|
||||
* the `computationOptions` parameter.
|
||||
@ -601,8 +609,10 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
* \deprecated Will be removed in the next major Eigen version. Options should
|
||||
* be specified in the \a Options template parameter.
|
||||
*/
|
||||
EIGEN_DEPRECATED JacobiSVD& compute(const MatrixType& matrix, unsigned int computationOptions) {
|
||||
internal::check_svd_options_assertions<MatrixType, Options>(m_computationOptions, matrix.rows(), matrix.cols());
|
||||
template <typename Derived>
|
||||
EIGEN_DEPRECATED JacobiSVD& compute(const MatrixBase<Derived>& matrix, unsigned int computationOptions) {
|
||||
internal::check_svd_options_assertions<MatrixBase<Derived>, Options>(m_computationOptions, matrix.rows(),
|
||||
matrix.cols());
|
||||
return compute_impl(matrix, computationOptions);
|
||||
}
|
||||
|
||||
@ -626,7 +636,8 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
}
|
||||
|
||||
private:
|
||||
JacobiSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions);
|
||||
template <typename Derived>
|
||||
JacobiSVD& compute_impl(const MatrixBase<Derived>& matrix, unsigned int computationOptions);
|
||||
|
||||
protected:
|
||||
using Base::m_computationOptions;
|
||||
@ -664,8 +675,13 @@ class JacobiSVD : public SVDBase<JacobiSVD<MatrixType_, Options_> > {
|
||||
};
|
||||
|
||||
template <typename MatrixType, int Options>
|
||||
JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute_impl(const MatrixType& matrix,
|
||||
template <typename Derived>
|
||||
JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute_impl(const MatrixBase<Derived>& matrix,
|
||||
unsigned int computationOptions) {
|
||||
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived, MatrixType);
|
||||
EIGEN_STATIC_ASSERT((std::is_same<typename Derived::Scalar, typename MatrixType::Scalar>::value),
|
||||
Input matrix must have the same Scalar type as the BDCSVD object.);
|
||||
|
||||
using std::abs;
|
||||
|
||||
allocate(matrix.rows(), matrix.cols(), computationOptions);
|
||||
|
@ -40,65 +40,65 @@ namespace Eigen {
|
||||
|
||||
/** \internal Specialization for the data types supported by LAPACKe */
|
||||
|
||||
#define EIGEN_LAPACKE_SVD(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW, OPTIONS) \
|
||||
template <> \
|
||||
inline JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>& \
|
||||
JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>::compute_impl( \
|
||||
const Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>& matrix, unsigned int computationOptions) { \
|
||||
typedef Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic> MatrixType; \
|
||||
/*typedef MatrixType::Scalar Scalar;*/ \
|
||||
/*typedef MatrixType::RealScalar RealScalar;*/ \
|
||||
allocate(matrix.rows(), matrix.cols(), computationOptions); \
|
||||
\
|
||||
/*const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();*/ \
|
||||
m_nonzeroSingularValues = diagSize(); \
|
||||
\
|
||||
lapack_int lda = internal::convert_index<lapack_int>(matrix.outerStride()), ldu, ldvt; \
|
||||
lapack_int matrix_order = LAPACKE_COLROW; \
|
||||
char jobu, jobvt; \
|
||||
LAPACKE_TYPE *u, *vt, dummy; \
|
||||
jobu = (m_computeFullU) ? 'A' : (m_computeThinU) ? 'S' : 'N'; \
|
||||
jobvt = (m_computeFullV) ? 'A' : (m_computeThinV) ? 'S' : 'N'; \
|
||||
if (computeU()) { \
|
||||
ldu = internal::convert_index<lapack_int>(m_matrixU.outerStride()); \
|
||||
u = (LAPACKE_TYPE*)m_matrixU.data(); \
|
||||
} else { \
|
||||
ldu = 1; \
|
||||
u = &dummy; \
|
||||
} \
|
||||
MatrixType localV; \
|
||||
lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(cols()) \
|
||||
: (m_computeThinV) ? internal::convert_index<lapack_int>(diagSize()) \
|
||||
: 1; \
|
||||
if (computeV()) { \
|
||||
localV.resize(vt_rows, cols()); \
|
||||
ldvt = internal::convert_index<lapack_int>(localV.outerStride()); \
|
||||
vt = (LAPACKE_TYPE*)localV.data(); \
|
||||
} else { \
|
||||
ldvt = 1; \
|
||||
vt = &dummy; \
|
||||
} \
|
||||
Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; \
|
||||
superb.resize(diagSize(), 1); \
|
||||
MatrixType m_temp; \
|
||||
m_temp = matrix; \
|
||||
lapack_int info = LAPACKE_##LAPACKE_PREFIX##gesvd( \
|
||||
matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(rows()), \
|
||||
internal::convert_index<lapack_int>(cols()), (LAPACKE_TYPE*)m_temp.data(), lda, \
|
||||
(LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \
|
||||
/* Check the result of the LAPACK call */ \
|
||||
if (info < 0 || !m_singularValues.allFinite()) { \
|
||||
m_info = InvalidInput; \
|
||||
} else if (info > 0) { \
|
||||
m_info = NoConvergence; \
|
||||
} else { \
|
||||
m_info = Success; \
|
||||
if (computeV()) m_matrixV = localV.adjoint(); \
|
||||
} \
|
||||
/* for(int i=0;i<diagSize();i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; \
|
||||
* m_singularValues.coeffRef(i)=RealScalar(0);}*/ \
|
||||
m_isInitialized = true; \
|
||||
return *this; \
|
||||
#define EIGEN_LAPACKE_SVD(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_PREFIX, EIGCOLROW, LAPACKE_COLROW, OPTIONS) \
|
||||
template <> \
|
||||
template <typename Derived> \
|
||||
inline JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>& \
|
||||
JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, OPTIONS>::compute_impl( \
|
||||
const MatrixBase<Derived>& matrix, unsigned int computationOptions) { \
|
||||
/*typedef MatrixType::Scalar Scalar;*/ \
|
||||
/*typedef MatrixType::RealScalar RealScalar;*/ \
|
||||
allocate(matrix.rows(), matrix.cols(), computationOptions); \
|
||||
\
|
||||
/*const RealScalar precision = RealScalar(2) * NumTraits<Scalar>::epsilon();*/ \
|
||||
m_nonzeroSingularValues = diagSize(); \
|
||||
\
|
||||
lapack_int lda = internal::convert_index<lapack_int>(matrix.outerStride()), ldu, ldvt; \
|
||||
lapack_int matrix_order = LAPACKE_COLROW; \
|
||||
char jobu, jobvt; \
|
||||
LAPACKE_TYPE *u, *vt, dummy; \
|
||||
jobu = (m_computeFullU) ? 'A' : (m_computeThinU) ? 'S' : 'N'; \
|
||||
jobvt = (m_computeFullV) ? 'A' : (m_computeThinV) ? 'S' : 'N'; \
|
||||
if (computeU()) { \
|
||||
ldu = internal::convert_index<lapack_int>(m_matrixU.outerStride()); \
|
||||
u = (LAPACKE_TYPE*)m_matrixU.data(); \
|
||||
} else { \
|
||||
ldu = 1; \
|
||||
u = &dummy; \
|
||||
} \
|
||||
MatrixType localV; \
|
||||
lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(cols()) \
|
||||
: (m_computeThinV) ? internal::convert_index<lapack_int>(diagSize()) \
|
||||
: 1; \
|
||||
if (computeV()) { \
|
||||
localV.resize(vt_rows, cols()); \
|
||||
ldvt = internal::convert_index<lapack_int>(localV.outerStride()); \
|
||||
vt = (LAPACKE_TYPE*)localV.data(); \
|
||||
} else { \
|
||||
ldvt = 1; \
|
||||
vt = &dummy; \
|
||||
} \
|
||||
Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; \
|
||||
superb.resize(diagSize(), 1); \
|
||||
MatrixType m_temp; \
|
||||
m_temp = matrix; \
|
||||
lapack_int info = LAPACKE_##LAPACKE_PREFIX##gesvd( \
|
||||
matrix_order, jobu, jobvt, internal::convert_index<lapack_int>(rows()), \
|
||||
internal::convert_index<lapack_int>(cols()), (LAPACKE_TYPE*)m_temp.data(), lda, \
|
||||
(LAPACKE_RTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \
|
||||
/* Check the result of the LAPACK call */ \
|
||||
if (info < 0 || !m_singularValues.allFinite()) { \
|
||||
m_info = InvalidInput; \
|
||||
} else if (info > 0) { \
|
||||
m_info = NoConvergence; \
|
||||
} else { \
|
||||
m_info = Success; \
|
||||
if (computeV()) m_matrixV = localV.adjoint(); \
|
||||
} \
|
||||
/* for(int i=0;i<diagSize();i++) if (m_singularValues.coeffRef(i) < precision) { m_nonzeroSingularValues--; \
|
||||
* m_singularValues.coeffRef(i)=RealScalar(0);}*/ \
|
||||
m_isInitialized = true; \
|
||||
return *this; \
|
||||
}
|
||||
|
||||
#define EIGEN_LAPACK_SVD_OPTIONS(OPTIONS) \
|
||||
|
@ -274,6 +274,10 @@ struct simpl_chol_helper {
|
||||
}
|
||||
};
|
||||
|
||||
// Symbol is ODR-used, so we need a definition.
|
||||
template <typename Scalar, typename StorageIndex>
|
||||
constexpr StorageIndex simpl_chol_helper<Scalar, StorageIndex>::kEmpty;
|
||||
|
||||
} // namespace internal
|
||||
|
||||
template <typename Derived>
|
||||
|
@ -36,10 +36,10 @@ inline typename internal::traits<Derived>::Scalar SparseMatrixBase<Derived>::dot
|
||||
Scalar res1(0);
|
||||
Scalar res2(0);
|
||||
for (; i; ++i) {
|
||||
res1 += numext::conj(i.value()) * other.coeff(i.index());
|
||||
res1 = numext::fma(numext::conj(i.value()), other.coeff(i.index()), res1);
|
||||
++i;
|
||||
if (i) {
|
||||
res2 += numext::conj(i.value()) * other.coeff(i.index());
|
||||
res2 = numext::fma(numext::conj(i.value()), other.coeff(i.index()), res2);
|
||||
}
|
||||
}
|
||||
return res1 + res2;
|
||||
|
@ -7,9 +7,7 @@
|
||||
script:
|
||||
- . ci/scripts/build.linux.script.sh
|
||||
tags:
|
||||
- linux
|
||||
- eigen-runner
|
||||
- cross-compiler
|
||||
- saas-linux-2xlarge-amd64
|
||||
rules:
|
||||
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen"
|
||||
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen"
|
||||
@ -244,11 +242,13 @@ build:linux:rocm-latest:gcc-10:
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: arm-linux-gnueabihf
|
||||
EIGEN_CI_ADDITIONAL_ARGS: >
|
||||
-DEIGEN_TEST_CUSTOM_CXX_FLAGS=-march=armv7-a;-mfpu=neon-vfpv4
|
||||
-DCMAKE_SYSTEM_NAME=Linux
|
||||
-DCMAKE_CROSSCOMPILING_EMULATOR=qemu-arm-static;-L;/usr/arm-linux-gnueabihf
|
||||
|
||||
build:linux:cross:arm:gcc-10:default:
|
||||
extends: .build:linux:cross:arm
|
||||
variables:
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf qemu-user-static
|
||||
EIGEN_CI_CROSS_C_COMPILER: arm-linux-gnueabihf-gcc-10
|
||||
EIGEN_CI_CROSS_CXX_COMPILER: arm-linux-gnueabihf-g++-10
|
||||
|
||||
@ -258,7 +258,7 @@ build:linux:cross:arm:clang-12:default:
|
||||
EIGEN_CI_INSTALL: clang-12
|
||||
EIGEN_CI_C_COMPILER: clang-12
|
||||
EIGEN_CI_CXX_COMPILER: clang++-12
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf clang-12
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf clang-12 qemu-user-static
|
||||
|
||||
######## aarch64 ###############################################################
|
||||
|
||||
@ -268,6 +268,8 @@ build:linux:cross:arm:clang-12:default:
|
||||
EIGEN_CI_TARGET_ARCH: aarch64
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: aarch64-linux-gnu
|
||||
EIGEN_CI_ADDITIONAL_ARGS: -DEIGEN_TEST_CUSTOM_CXX_FLAGS=-march=armv8.2-a+fp16
|
||||
tags:
|
||||
- saas-linux-large-arm64
|
||||
|
||||
build:linux:cross:aarch64:gcc-10:default:
|
||||
extends: .build:linux:cross:aarch64
|
||||
@ -290,28 +292,27 @@ build:linux:cross:aarch64:clang-12:default:
|
||||
|
||||
.build:linux:cross:ppc64le:
|
||||
extends: .build:linux:cross
|
||||
image: ubuntu:24.04
|
||||
variables:
|
||||
EIGEN_CI_TARGET_ARCH: ppc64le
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: powerpc64le-linux-gnu
|
||||
EIGEN_CI_ADDITIONAL_ARGS: >-
|
||||
-DCMAKE_SYSTEM_NAME=Linux
|
||||
-DCMAKE_CROSSCOMPILING_EMULATOR=qemu-ppc64le-static;-L;/usr/powerpc64le-linux-gnu
|
||||
|
||||
build:linux:cross:ppc64le:gcc-10:default:
|
||||
build:linux:cross:ppc64le:gcc-14:default:
|
||||
extends: .build:linux:cross:ppc64le
|
||||
variables:
|
||||
EIGEN_CI_C_COMPILER: gcc-10
|
||||
EIGEN_CI_CXX_COMPILER: g++-10
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-powerpc64le-linux-gnu
|
||||
EIGEN_CI_CROSS_C_COMPILER: powerpc64le-linux-gnu-gcc-10
|
||||
EIGEN_CI_CROSS_CXX_COMPILER: powerpc64le-linux-gnu-g++-10
|
||||
# Temporarily disable MMA until #2457 is resolved.
|
||||
EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_ALTIVEC_DISABLE_MMA=1"
|
||||
EIGEN_CI_CROSS_INSTALL: g++-14-powerpc64le-linux-gnu qemu-user-static
|
||||
EIGEN_CI_CROSS_C_COMPILER: powerpc64le-linux-gnu-gcc-14
|
||||
EIGEN_CI_CROSS_CXX_COMPILER: powerpc64le-linux-gnu-g++-14
|
||||
|
||||
build:linux:cross:ppc64le:clang-12:default:
|
||||
build:linux:cross:ppc64le:clang-16:default:
|
||||
extends: .build:linux:cross:ppc64le
|
||||
variables:
|
||||
EIGEN_CI_INSTALL: clang-12
|
||||
EIGEN_CI_C_COMPILER: clang-12
|
||||
EIGEN_CI_CXX_COMPILER: clang++-12
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-powerpc64le-linux-gnu clang-12
|
||||
EIGEN_CI_C_COMPILER: clang-16
|
||||
EIGEN_CI_CXX_COMPILER: clang++-16
|
||||
EIGEN_CI_CROSS_INSTALL: g++-14-powerpc64le-linux-gnu clang-16 qemu-user-static
|
||||
|
||||
######## loongarch64 #################################################
|
||||
|
||||
@ -320,17 +321,13 @@ build:linux:cross:ppc64le:clang-12:default:
|
||||
variables:
|
||||
EIGEN_CI_TARGET_ARCH: loongarch64
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: loongarch64-linux-gnu
|
||||
tags:
|
||||
- eigen-runner
|
||||
- linux
|
||||
- cross-compiler
|
||||
|
||||
# GCC-14 (minimum on Ubuntu 24)
|
||||
build:linux:cross:loongarch64:gcc-14:default:
|
||||
extends: .build:linux:cross:loongarch64
|
||||
image: ubuntu:24.04
|
||||
variables:
|
||||
EIGEN_CI_CROSS_INSTALL: g++-14-loongarch64-linux-gnu
|
||||
EIGEN_CI_CROSS_INSTALL: g++-14-loongarch64-linux-gnu qemu-user-static
|
||||
EIGEN_CI_CROSS_C_COMPILER: loongarch64-linux-gnu-gcc-14
|
||||
EIGEN_CI_CROSS_CXX_COMPILER: loongarch64-linux-gnu-g++-14
|
||||
EIGEN_CI_ADDITIONAL_ARGS: >-
|
||||
|
@ -9,6 +9,8 @@
|
||||
- if: $CI_PIPELINE_SOURCE == "schedule" && $CI_PROJECT_NAMESPACE == "libeigen"
|
||||
- if: $CI_PIPELINE_SOURCE == "web" && $CI_PROJECT_NAMESPACE == "libeigen"
|
||||
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_PROJECT_NAMESPACE == "libeigen" && $CI_MERGE_REQUEST_LABELS =~ "/all-tests/"
|
||||
tags:
|
||||
- saas-linux-2xlarge-amd64
|
||||
|
||||
##### x86-64 ###################################################################
|
||||
.test:linux:x86-64:
|
||||
@ -16,10 +18,6 @@
|
||||
variables:
|
||||
EIGEN_CI_TARGET_ARCH: x86_64
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: x86_64-linux-gnu
|
||||
tags:
|
||||
- eigen-runner
|
||||
- linux
|
||||
- x86-64
|
||||
|
||||
# GCC-6 (minimum on Ubuntu 18.04)
|
||||
.test:linux:x86-64:gcc-6:default:
|
||||
@ -289,18 +287,13 @@ test:linux:cuda-12.2:clang-12:
|
||||
variables:
|
||||
EIGEN_CI_TARGET_ARCH: arm
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: arm-linux-gnueabihf
|
||||
# Enable cross-compiled arm binary to run on aarch64.
|
||||
EIGEN_CI_BEFORE_SCRIPT: "ln -s /usr/arm-linux-gnueabihf/lib/ld-linux-armhf.so.3 /lib/ && export LD_LIBRARY_PATH=/usr/arm-linux-gnueabihf/lib/"
|
||||
tags:
|
||||
- eigen-runner
|
||||
- linux
|
||||
- aarch64
|
||||
EIGEN_CI_CTEST_ARGS: --timeout 2000
|
||||
|
||||
.test:linux:arm:gcc-10:default:
|
||||
extends: .test:linux:arm
|
||||
needs: [ build:linux:cross:arm:gcc-10:default ]
|
||||
variables:
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf qemu-user-static
|
||||
|
||||
test:linux:arm:gcc-10:default:official:
|
||||
extends: .test:linux:arm:gcc-10:default
|
||||
@ -316,7 +309,7 @@ test:linux:arm:gcc-10:default:unsupported:
|
||||
extends: .test:linux:arm
|
||||
needs: [ build:linux:cross:arm:clang-12:default ]
|
||||
variables:
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf clang-12
|
||||
EIGEN_CI_CROSS_INSTALL: g++-10-arm-linux-gnueabihf clang-12 qemu-user-static
|
||||
|
||||
test:linux:arm:clang-12:default:official:
|
||||
extends: .test:linux:arm:clang-12:default
|
||||
@ -336,9 +329,7 @@ test:linux:arm:clang-12:default:unsupported:
|
||||
EIGEN_CI_TARGET_ARCH: aarch64
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: aarch64-linux-gnu
|
||||
tags:
|
||||
- eigen-runner
|
||||
- linux
|
||||
- aarch64
|
||||
- saas-linux-large-arm64
|
||||
|
||||
.test:linux:aarch64:gcc-10:default:
|
||||
extends: .test:linux:aarch64
|
||||
@ -376,60 +367,54 @@ test:linux:aarch64:clang-12:default:unsupported:
|
||||
|
||||
.test:linux:ppc64le:
|
||||
extends: .test:linux
|
||||
image: ubuntu:24.04
|
||||
variables:
|
||||
EIGEN_CI_TARGET_ARCH: ppc64le
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: powerpc64le-linux-gnu
|
||||
tags:
|
||||
- eigen-runner
|
||||
- linux
|
||||
- ppc64le
|
||||
EIGEN_CI_CTEST_ARGS: --timeout 2000
|
||||
|
||||
.test:linux:ppc64le:gcc-10:default:
|
||||
.test:linux:ppc64le:gcc-14:default:
|
||||
extends: .test:linux:ppc64le
|
||||
needs: [ build:linux:cross:ppc64le:gcc-10:default ]
|
||||
needs: [ build:linux:cross:ppc64le:gcc-14:default ]
|
||||
variables:
|
||||
EIGEN_CI_INSTALL: g++-10
|
||||
EIGEN_CI_CROSS_INSTALL: g++-14-powerpc64le-linux-gnu qemu-user-static
|
||||
|
||||
test:linux:ppc64le:gcc-10:default:official:
|
||||
extends: .test:linux:ppc64le:gcc-10:default
|
||||
test:linux:ppc64le:gcc-14:default:official:
|
||||
extends: .test:linux:ppc64le:gcc-14:default
|
||||
variables:
|
||||
EIGEN_CI_CTEST_LABEL: Official
|
||||
|
||||
test:linux:ppc64le:gcc-10:default:unsupported:
|
||||
extends: .test:linux:ppc64le:gcc-10:default
|
||||
test:linux:ppc64le:gcc-14:default:unsupported:
|
||||
extends: .test:linux:ppc64le:gcc-14:default
|
||||
variables:
|
||||
EIGEN_CI_CTEST_LABEL: Unsupported
|
||||
|
||||
.test:linux:ppc64le:clang-12:default:
|
||||
.test:linux:ppc64le:clang-16:default:
|
||||
extends: .test:linux:ppc64le
|
||||
needs: [ build:linux:cross:ppc64le:clang-12:default ]
|
||||
needs: [ build:linux:cross:ppc64le:clang-16:default ]
|
||||
variables:
|
||||
EIGEN_CI_INSTALL: clang-12
|
||||
EIGEN_CI_CROSS_INSTALL: g++-14-powerpc64le-linux-gnu clang-16 qemu-user-static
|
||||
|
||||
test:linux:ppc64le:clang-12:default:official:
|
||||
extends: .test:linux:ppc64le:clang-12:default
|
||||
test:linux:ppc64le:clang-16:default:official:
|
||||
extends: .test:linux:ppc64le:clang-16:default
|
||||
variables:
|
||||
EIGEN_CI_CTEST_LABEL: Official
|
||||
|
||||
test:linux:ppc64le:clang-12:default:unsupported:
|
||||
extends: .test:linux:ppc64le:clang-12:default
|
||||
test:linux:ppc64le:clang-16:default:unsupported:
|
||||
extends: .test:linux:ppc64le:clang-16:default
|
||||
variables:
|
||||
EIGEN_CI_CTEST_LABEL: Unsupported
|
||||
|
||||
##### loongarch64 ###################################################################
|
||||
##### loongarch64 ##############################################################
|
||||
|
||||
.test:linux:loongarch64:
|
||||
extends: .test:linux
|
||||
image: ubuntu:24.04
|
||||
variables:
|
||||
EIGEN_CI_TARGET_ARCH: loongarch64
|
||||
EIGEN_CI_CROSS_TARGET_TRIPLE: loongarch64-linux-gnu
|
||||
# Install QEMU and set up the execution environment in the image
|
||||
EIGEN_CI_CROSS_INSTALL: g++-14-loongarch64-linux-gnu qemu-user-static
|
||||
EIGEN_CI_CTEST_ARGS: --timeout 2000
|
||||
tags:
|
||||
- eigen-runner
|
||||
- linux
|
||||
- cross-compiler
|
||||
|
||||
# GCC-14 (Ubuntu 24)
|
||||
.test:linux:loongarch64:gcc-14:default:
|
||||
|
@ -16,7 +16,7 @@
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_TEST_ANNOYING_SCALAR_DONT_THROW
|
||||
#if defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_TEST_ANNOYING_SCALAR_DONT_THROW)
|
||||
struct my_exception {
|
||||
my_exception() {}
|
||||
~my_exception() {}
|
||||
@ -76,7 +76,7 @@ class AnnoyingScalar {
|
||||
}
|
||||
|
||||
AnnoyingScalar operator+(const AnnoyingScalar& other) const {
|
||||
#ifndef EIGEN_TEST_ANNOYING_SCALAR_DONT_THROW
|
||||
#if defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_TEST_ANNOYING_SCALAR_DONT_THROW)
|
||||
countdown--;
|
||||
if (countdown <= 0 && !dont_throw) throw my_exception();
|
||||
#endif
|
||||
|
@ -1340,7 +1340,7 @@ EIGEN_DECLARE_TEST(array_cwise) {
|
||||
CALL_SUBTEST_3(array_generic(Array44d()));
|
||||
CALL_SUBTEST_4(array_generic(
|
||||
ArrayXXcf(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
|
||||
CALL_SUBTEST_7(array_generic(
|
||||
CALL_SUBTEST_5(array_generic(
|
||||
ArrayXXf(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
|
||||
CALL_SUBTEST_8(array_generic(
|
||||
ArrayXXi(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
|
||||
|
@ -8,7 +8,7 @@
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
// Various sanity tests with exceptions and non trivially copyable scalar type.
|
||||
// - no memory leak when a custom scalar type trow an exceptions
|
||||
// - no memory leak when a custom scalar type throw an exceptions
|
||||
// - todo: complete the list of tests!
|
||||
|
||||
#define EIGEN_STACK_ALLOCATION_LIMIT 100000000
|
||||
@ -21,9 +21,8 @@
|
||||
AnnoyingScalar::countdown = 100; \
|
||||
int before = AnnoyingScalar::instances; \
|
||||
bool exception_thrown = false; \
|
||||
try { \
|
||||
OP; \
|
||||
} catch (my_exception) { \
|
||||
EIGEN_TRY { OP; } \
|
||||
EIGEN_CATCH(my_exception) { \
|
||||
exception_thrown = true; \
|
||||
VERIFY(AnnoyingScalar::instances == before && "memory leak detected in " && EIGEN_MAKESTRING(OP)); \
|
||||
} \
|
||||
@ -35,7 +34,11 @@ EIGEN_DECLARE_TEST(exceptions) {
|
||||
typedef Eigen::Matrix<AnnoyingScalar, Dynamic, Dynamic> MatrixType;
|
||||
|
||||
{
|
||||
#if defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_TEST_ANNOYING_SCALAR_DONT_THROW)
|
||||
AnnoyingScalar::dont_throw = false;
|
||||
#else
|
||||
AnnoyingScalar::dont_throw = true;
|
||||
#endif
|
||||
int n = 50;
|
||||
VectorType v0(n), v1(n);
|
||||
MatrixType m0(n, n), m1(n, n), m2(n, n);
|
||||
|
@ -78,6 +78,19 @@ void quaternion(void) {
|
||||
VERIFY(ss.str() == "0i + 0j + 0k + 1");
|
||||
#endif
|
||||
|
||||
// Consistent handling of scalar first/last conventions regardless of Eigen's own coefficient layout
|
||||
const Scalar w(a);
|
||||
const Vector3 xyz(v0);
|
||||
q1 = Quaternionx::FromCoeffsScalarFirst(w, xyz.x(), xyz.y(), xyz.z());
|
||||
q2 = Quaternionx::FromCoeffsScalarLast(xyz.x(), xyz.y(), xyz.z(), w);
|
||||
VERIFY_IS_EQUAL(q1, q2);
|
||||
|
||||
VERIFY_IS_EQUAL(q1.coeffsScalarFirst()[0], w);
|
||||
VERIFY_IS_EQUAL(q1.coeffsScalarFirst()(seqN(1, 3)), xyz);
|
||||
|
||||
VERIFY_IS_EQUAL(q1.coeffsScalarLast()[3], w);
|
||||
VERIFY_IS_EQUAL(q1.coeffsScalarLast()(seqN(0, 3)), xyz);
|
||||
|
||||
// concatenation
|
||||
q1 *= q2;
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <Eigen/Core>
|
||||
|
||||
// Allow gpu** macros for generic tests.
|
||||
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
|
||||
#include <Eigen/src/Core/util/GpuHipCudaDefines.inc>
|
||||
|
||||
// std::tuple cannot be used on device, and there is a bug in cuda < 9.2 that
|
||||
// doesn't allow std::tuple to compile for host code either. In these cases,
|
||||
|
@ -72,17 +72,16 @@ void test_conversion() {
|
||||
// NaNs and infinities.
|
||||
VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number.
|
||||
VERIFY(!(numext::isnan)(float(half(0.0f))));
|
||||
VERIFY((numext::isfinite)(float(half(65504.0f))));
|
||||
VERIFY((numext::isfinite)(float(half(0.0f))));
|
||||
VERIFY((numext::isinf)(float(half(__half_raw(0xfc00)))));
|
||||
VERIFY((numext::isnan)(float(half(__half_raw(0xfc01)))));
|
||||
VERIFY((numext::isinf)(float(half(__half_raw(0x7c00)))));
|
||||
VERIFY((numext::isnan)(float(half(__half_raw(0x7c01)))));
|
||||
|
||||
#if !EIGEN_COMP_MSVC
|
||||
// Visual Studio errors out on divisions by 0
|
||||
VERIFY((numext::isnan)(float(half(0.0 / 0.0))));
|
||||
VERIFY((numext::isinf)(float(half(1.0 / 0.0))));
|
||||
VERIFY((numext::isinf)(float(half(-1.0 / 0.0))));
|
||||
#endif
|
||||
VERIFY((numext::isnan)(float(NumTraits<half>::quiet_NaN())));
|
||||
VERIFY((numext::isinf)(float(NumTraits<half>::infinity())));
|
||||
VERIFY((numext::isinf)(float(-NumTraits<half>::infinity())));
|
||||
|
||||
// Exactly same checks as above, just directly on the half representation.
|
||||
VERIFY(!(numext::isinf)(half(__half_raw(0x7bff))));
|
||||
@ -92,12 +91,9 @@ void test_conversion() {
|
||||
VERIFY((numext::isinf)(half(__half_raw(0x7c00))));
|
||||
VERIFY((numext::isnan)(half(__half_raw(0x7c01))));
|
||||
|
||||
#if !EIGEN_COMP_MSVC
|
||||
// Visual Studio errors out on divisions by 0
|
||||
VERIFY((numext::isnan)(half(0.0 / 0.0)));
|
||||
VERIFY((numext::isinf)(half(1.0 / 0.0)));
|
||||
VERIFY((numext::isinf)(half(-1.0 / 0.0)));
|
||||
#endif
|
||||
VERIFY((numext::isnan)(NumTraits<half>::quiet_NaN()));
|
||||
VERIFY((numext::isinf)(NumTraits<half>::infinity()));
|
||||
VERIFY((numext::isinf)(-NumTraits<half>::infinity()));
|
||||
|
||||
// Conversion to bool
|
||||
VERIFY(!static_cast<bool>(half(0.0)));
|
||||
@ -204,19 +200,25 @@ void test_comparison() {
|
||||
VERIFY(half(1.0f) != half(2.0f));
|
||||
|
||||
// Comparisons with NaNs and infinities.
|
||||
#if !EIGEN_COMP_MSVC
|
||||
// Visual Studio errors out on divisions by 0
|
||||
VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0)));
|
||||
VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0));
|
||||
VERIFY(!(NumTraits<half>::quiet_NaN() == NumTraits<half>::quiet_NaN()));
|
||||
VERIFY(NumTraits<half>::quiet_NaN() != NumTraits<half>::quiet_NaN());
|
||||
|
||||
VERIFY(!(half(1.0) == half(0.0 / 0.0)));
|
||||
VERIFY(!(half(1.0) < half(0.0 / 0.0)));
|
||||
VERIFY(!(half(1.0) > half(0.0 / 0.0)));
|
||||
VERIFY(half(1.0) != half(0.0 / 0.0));
|
||||
VERIFY(!(internal::random<half>() == NumTraits<half>::quiet_NaN()));
|
||||
VERIFY(!(internal::random<half>() < NumTraits<half>::quiet_NaN()));
|
||||
VERIFY(!(internal::random<half>() > NumTraits<half>::quiet_NaN()));
|
||||
VERIFY(!(internal::random<half>() <= NumTraits<half>::quiet_NaN()));
|
||||
VERIFY(!(internal::random<half>() >= NumTraits<half>::quiet_NaN()));
|
||||
VERIFY(internal::random<half>() != NumTraits<half>::quiet_NaN());
|
||||
|
||||
VERIFY(half(1.0) < half(1.0 / 0.0));
|
||||
VERIFY(half(1.0) > half(-1.0 / 0.0));
|
||||
#endif
|
||||
VERIFY(!(NumTraits<half>::quiet_NaN() == internal::random<half>()));
|
||||
VERIFY(!(NumTraits<half>::quiet_NaN() < internal::random<half>()));
|
||||
VERIFY(!(NumTraits<half>::quiet_NaN() > internal::random<half>()));
|
||||
VERIFY(!(NumTraits<half>::quiet_NaN() <= internal::random<half>()));
|
||||
VERIFY(!(NumTraits<half>::quiet_NaN() >= internal::random<half>()));
|
||||
VERIFY(NumTraits<half>::quiet_NaN() != internal::random<half>());
|
||||
|
||||
VERIFY(internal::random<half>() < NumTraits<half>::infinity());
|
||||
VERIFY(internal::random<half>() > -NumTraits<half>::infinity());
|
||||
}
|
||||
|
||||
void test_basic_functions() {
|
||||
|
@ -343,7 +343,7 @@ static std::vector<std::string> eigen_assert_list;
|
||||
#if !defined(EIGEN_TESTING_CONSTEXPR) && !defined(EIGEN_TESTING_PLAINOBJECT_CTOR)
|
||||
#define EIGEN_INTERNAL_DEBUGGING
|
||||
#endif
|
||||
#include <Eigen/QR> // required for createRandomPIMatrixOfRank and generateRandomMatrixSvs
|
||||
#include <Eigen/Core>
|
||||
|
||||
inline void verify_impl(bool condition, const char* testname, const char* file, int line,
|
||||
const char* condition_as_string) {
|
||||
@ -935,3 +935,7 @@ int main(int argc, char* argv[]) {
|
||||
#endif
|
||||
|
||||
#include "gpu_test_helper.h"
|
||||
|
||||
#ifndef EIGEN_TEST_MAX_SIZE
|
||||
#define EIGEN_TEST_MAX_SIZE 320
|
||||
#endif
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include "main.h"
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#include <exception> // std::exception
|
||||
#endif
|
||||
|
||||
#include <Eigen/src/Core/util/MaxSizeVector.h>
|
||||
|
||||
@ -31,28 +33,27 @@ struct Foo {
|
||||
std::cout << '~';
|
||||
--Foo::object_count;
|
||||
}
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
class Fail : public std::exception {};
|
||||
#endif
|
||||
};
|
||||
|
||||
Index Foo::object_count = 0;
|
||||
Index Foo::object_limit = 0;
|
||||
|
||||
EIGEN_DECLARE_TEST(cxx11_maxsizevector) {
|
||||
EIGEN_DECLARE_TEST(maxsizevector) {
|
||||
typedef MaxSizeVector<Foo> VectorX;
|
||||
Foo::object_count = 0;
|
||||
for (int r = 0; r < g_repeat; r++) {
|
||||
Index rows = internal::random<Index>(3, 30);
|
||||
Foo::object_limit = internal::random<Index>(0, rows - 2);
|
||||
std::cout << "object_limit = " << Foo::object_limit << std::endl;
|
||||
bool exception_raised = false;
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
bool exception_raised = false;
|
||||
try {
|
||||
#endif
|
||||
std::cout << "\nVectorX m(" << rows << ");\n";
|
||||
VectorX vect(rows);
|
||||
for (int i = 0; i < rows; ++i) vect.push_back(Foo());
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
VERIFY(false); // not reached if exceptions are enabled
|
||||
} catch (const Foo::Fail&) {
|
||||
exception_raised = true;
|
||||
|
@ -354,28 +354,28 @@ void packetmath_boolean_mask_ops() {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = internal::random<Scalar>();
|
||||
}
|
||||
CHECK_CWISE1(internal::ptrue, internal::ptrue);
|
||||
CHECK_CWISE1_MASK(internal::ptrue, internal::ptrue);
|
||||
CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = Scalar(RealScalar(i));
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
|
||||
|
||||
// Test (-0) == (0) for signed operations
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = Scalar(-0.0);
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
|
||||
|
||||
// Test NaN
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = NumTraits<Scalar>::quiet_NaN();
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_eq, internal::pcmp_eq);
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Packet>
|
||||
@ -384,28 +384,27 @@ void packetmath_boolean_mask_ops_real() {
|
||||
const int size = 2 * PacketSize;
|
||||
EIGEN_ALIGN_MAX Scalar data1[size];
|
||||
EIGEN_ALIGN_MAX Scalar data2[size];
|
||||
EIGEN_ALIGN_MAX Scalar ref[size];
|
||||
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = internal::random<Scalar>();
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
|
||||
|
||||
// Test (-0) <=/< (0) for signed operations
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = Scalar(-0.0);
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
|
||||
|
||||
// Test NaN
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = NumTraits<Scalar>::quiet_NaN();
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_lt_or_nan, internal::pcmp_lt_or_nan);
|
||||
}
|
||||
|
||||
template <typename Scalar, typename Packet, typename EnableIf = void>
|
||||
@ -422,31 +421,30 @@ struct packetmath_boolean_mask_ops_notcomplex_test<
|
||||
const int size = 2 * PacketSize;
|
||||
EIGEN_ALIGN_MAX Scalar data1[size];
|
||||
EIGEN_ALIGN_MAX Scalar data2[size];
|
||||
EIGEN_ALIGN_MAX Scalar ref[size];
|
||||
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = internal::random<Scalar>();
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_le, internal::pcmp_le);
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_lt, internal::pcmp_lt);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_le, internal::pcmp_le);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_lt, internal::pcmp_lt);
|
||||
|
||||
// Test (-0) <=/< (0) for signed operations
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = Scalar(-0.0);
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_le, internal::pcmp_le);
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_lt, internal::pcmp_lt);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_le, internal::pcmp_le);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_lt, internal::pcmp_lt);
|
||||
|
||||
// Test NaN
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = NumTraits<Scalar>::quiet_NaN();
|
||||
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
||||
}
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_le, internal::pcmp_le);
|
||||
CHECK_CWISE2_IF(true, internal::pcmp_lt, internal::pcmp_lt);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_le, internal::pcmp_le);
|
||||
CHECK_CWISE2_MASK(internal::pcmp_lt, internal::pcmp_lt);
|
||||
}
|
||||
};
|
||||
|
||||
@ -700,11 +698,12 @@ void packetmath() {
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = internal::random<Scalar>(Scalar(0) - limit, limit);
|
||||
}
|
||||
} else if (!NumTraits<Scalar>::IsInteger && !NumTraits<Scalar>::IsComplex) {
|
||||
} else if (!NumTraits<Scalar>::IsInteger && !NumTraits<Scalar>::IsComplex && !std::is_same<Scalar, bool>::value) {
|
||||
// Prevent very small product results by adjusting range. Otherwise,
|
||||
// we may end up with multiplying e.g. 32 Eigen::halfs with values < 1.
|
||||
for (int i = 0; i < PacketSize; ++i) {
|
||||
data1[i] = internal::random<Scalar>(Scalar(0.5), Scalar(1)) * (internal::random<bool>() ? Scalar(-1) : Scalar(1));
|
||||
data1[i] = REF_MUL(internal::random<Scalar>(Scalar(0.5), Scalar(1)),
|
||||
(internal::random<bool>() ? Scalar(-1) : Scalar(1)));
|
||||
}
|
||||
}
|
||||
ref[0] = Scalar(1);
|
||||
|
@ -115,6 +115,30 @@ bool areApprox(const Scalar* a, const Scalar* b, int size, const typename NumTra
|
||||
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
|
||||
}
|
||||
|
||||
#define CHECK_CWISE1_MASK(REFOP, POP) \
|
||||
{ \
|
||||
bool ref_mask[PacketSize] = {}; \
|
||||
bool data_mask[PacketSize] = {}; \
|
||||
internal::pstore(data2, POP(internal::pload<Packet>(data1))); \
|
||||
for (int i = 0; i < PacketSize; ++i) { \
|
||||
ref_mask[i] = numext::is_exactly_zero(REFOP(data1[i])); \
|
||||
data_mask[i] = numext::is_exactly_zero(data2[i]); \
|
||||
} \
|
||||
VERIFY(test::areEqual(ref_mask, data_mask, PacketSize) && #POP); \
|
||||
}
|
||||
|
||||
#define CHECK_CWISE2_MASK(REFOP, POP) \
|
||||
{ \
|
||||
bool ref_mask[PacketSize] = {}; \
|
||||
bool data_mask[PacketSize] = {}; \
|
||||
internal::pstore(data2, POP(internal::pload<Packet>(data1), internal::pload<Packet>(data1 + PacketSize))); \
|
||||
for (int i = 0; i < PacketSize; ++i) { \
|
||||
ref_mask[i] = numext::is_exactly_zero(REFOP(data1[i], data1[i + PacketSize])); \
|
||||
data_mask[i] = numext::is_exactly_zero(data2[i]); \
|
||||
} \
|
||||
VERIFY(test::areEqual(ref_mask, data_mask, PacketSize) && #POP); \
|
||||
}
|
||||
|
||||
// Checks component-wise for input of size N. All of data1, data2, and ref
|
||||
// should have size at least ceil(N/PacketSize)*PacketSize to avoid memory
|
||||
// access errors.
|
||||
|
@ -57,6 +57,10 @@ void product_selfadjoint(const MatrixType& m) {
|
||||
v1.tail(rows - 1) * v2.head(cols - 1).adjoint() + v2.head(cols - 1) * v1.tail(rows - 1).adjoint();
|
||||
VERIFY_IS_APPROX(m2, m3.template triangularView<Lower>().toDenseMatrix());
|
||||
}
|
||||
|
||||
// matrix-vector
|
||||
m2 = m1.template triangularView<Lower>();
|
||||
VERIFY_IS_APPROX(m1 * m4, m2.template selfadjointView<Lower>() * m4);
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(product_selfadjoint) {
|
||||
|
@ -37,12 +37,9 @@ void matrixRedux(const MatrixType& m) {
|
||||
m2.array() = m2.array() - kMaxVal * (m2.array() / kMaxVal);
|
||||
}
|
||||
|
||||
VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1));
|
||||
VERIFY_IS_APPROX(
|
||||
MatrixType::Ones(rows, cols).sum(),
|
||||
Scalar(float(
|
||||
rows *
|
||||
cols))); // the float() here to shut up excessive MSVC warning about int->complex conversion being lossy
|
||||
VERIFY_IS_EQUAL(MatrixType::Zero(rows, cols).sum(), Scalar(0));
|
||||
Scalar sizeAsScalar = internal::cast<Index, Scalar>(rows * cols);
|
||||
VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).sum(), sizeAsScalar);
|
||||
Scalar s(0), p(1), minc(numext::real(m1.coeff(0))), maxc(numext::real(m1.coeff(0)));
|
||||
for (int j = 0; j < cols; j++)
|
||||
for (int i = 0; i < rows; i++) {
|
||||
@ -160,6 +157,10 @@ EIGEN_DECLARE_TEST(redux) {
|
||||
int maxsize = (std::min)(100, EIGEN_TEST_MAX_SIZE);
|
||||
TEST_SET_BUT_UNUSED_VARIABLE(maxsize);
|
||||
for (int i = 0; i < g_repeat; i++) {
|
||||
int rows = internal::random<int>(1, maxsize);
|
||||
int cols = internal::random<int>(1, maxsize);
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
EIGEN_UNUSED_VARIABLE(cols);
|
||||
CALL_SUBTEST_1(matrixRedux(Matrix<float, 1, 1>()));
|
||||
CALL_SUBTEST_1(matrixRedux(Array<float, 1, 1>()));
|
||||
CALL_SUBTEST_2(matrixRedux(Matrix2f()));
|
||||
@ -168,19 +169,37 @@ EIGEN_DECLARE_TEST(redux) {
|
||||
CALL_SUBTEST_3(matrixRedux(Matrix4d()));
|
||||
CALL_SUBTEST_3(matrixRedux(Array4d()));
|
||||
CALL_SUBTEST_3(matrixRedux(Array44d()));
|
||||
CALL_SUBTEST_4(matrixRedux(MatrixXcf(internal::random<int>(1, maxsize), internal::random<int>(1, maxsize))));
|
||||
CALL_SUBTEST_4(matrixRedux(ArrayXXcf(internal::random<int>(1, maxsize), internal::random<int>(1, maxsize))));
|
||||
CALL_SUBTEST_5(matrixRedux(MatrixXd(internal::random<int>(1, maxsize), internal::random<int>(1, maxsize))));
|
||||
CALL_SUBTEST_5(matrixRedux(ArrayXXd(internal::random<int>(1, maxsize), internal::random<int>(1, maxsize))));
|
||||
CALL_SUBTEST_6(matrixRedux(MatrixXi(internal::random<int>(1, maxsize), internal::random<int>(1, maxsize))));
|
||||
CALL_SUBTEST_6(matrixRedux(ArrayXXi(internal::random<int>(1, maxsize), internal::random<int>(1, maxsize))));
|
||||
CALL_SUBTEST_4(matrixRedux(MatrixXf(rows, cols)));
|
||||
CALL_SUBTEST_4(matrixRedux(ArrayXXf(rows, cols)));
|
||||
CALL_SUBTEST_4(matrixRedux(MatrixXd(rows, cols)));
|
||||
CALL_SUBTEST_4(matrixRedux(ArrayXXd(rows, cols)));
|
||||
/* TODO: fix test for boolean */
|
||||
/*CALL_SUBTEST_5(matrixRedux(MatrixX<bool>(rows, cols)));*/
|
||||
/*CALL_SUBTEST_5(matrixRedux(ArrayXX<bool>(rows, cols)));*/
|
||||
CALL_SUBTEST_5(matrixRedux(MatrixXi(rows, cols)));
|
||||
CALL_SUBTEST_5(matrixRedux(ArrayXXi(rows, cols)));
|
||||
CALL_SUBTEST_5(matrixRedux(MatrixX<int64_t>(rows, cols)));
|
||||
CALL_SUBTEST_5(matrixRedux(ArrayXX<int64_t>(rows, cols)));
|
||||
CALL_SUBTEST_6(matrixRedux(MatrixXcf(rows, cols)));
|
||||
CALL_SUBTEST_6(matrixRedux(ArrayXXcf(rows, cols)));
|
||||
CALL_SUBTEST_7(matrixRedux(MatrixXcd(rows, cols)));
|
||||
CALL_SUBTEST_7(matrixRedux(ArrayXXcd(rows, cols)));
|
||||
}
|
||||
for (int i = 0; i < g_repeat; i++) {
|
||||
CALL_SUBTEST_7(vectorRedux(Vector4f()));
|
||||
CALL_SUBTEST_7(vectorRedux(Array4f()));
|
||||
CALL_SUBTEST_5(vectorRedux(VectorXd(internal::random<int>(1, maxsize))));
|
||||
CALL_SUBTEST_5(vectorRedux(ArrayXd(internal::random<int>(1, maxsize))));
|
||||
CALL_SUBTEST_8(vectorRedux(VectorXf(internal::random<int>(1, maxsize))));
|
||||
CALL_SUBTEST_8(vectorRedux(ArrayXf(internal::random<int>(1, maxsize))));
|
||||
int size = internal::random<int>(1, maxsize);
|
||||
EIGEN_UNUSED_VARIABLE(size);
|
||||
CALL_SUBTEST_8(vectorRedux(Vector4f()));
|
||||
CALL_SUBTEST_8(vectorRedux(Array4f()));
|
||||
CALL_SUBTEST_9(vectorRedux(VectorXf(size)));
|
||||
CALL_SUBTEST_9(vectorRedux(ArrayXf(size)));
|
||||
CALL_SUBTEST_10(vectorRedux(VectorXd(size)));
|
||||
CALL_SUBTEST_10(vectorRedux(ArrayXd(size)));
|
||||
/* TODO: fix test for boolean */
|
||||
/*CALL_SUBTEST_10(vectorRedux(VectorX<bool>(size)));*/
|
||||
/*CALL_SUBTEST_10(vectorRedux(ArrayX<bool>(size)));*/
|
||||
CALL_SUBTEST_10(vectorRedux(VectorXi(size)));
|
||||
CALL_SUBTEST_10(vectorRedux(ArrayXi(size)));
|
||||
CALL_SUBTEST_10(vectorRedux(VectorX<int64_t>(size)));
|
||||
CALL_SUBTEST_10(vectorRedux(ArrayX<int64_t>(size)));
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "main.h"
|
||||
|
||||
#ifdef EIGEN_EXCEPTIONS
|
||||
#define VERIFY_THROWS_BADALLOC(a) \
|
||||
{ \
|
||||
bool threw = false; \
|
||||
@ -19,6 +20,10 @@
|
||||
} \
|
||||
VERIFY(threw && "should have thrown bad_alloc: " #a); \
|
||||
}
|
||||
#else
|
||||
// No way to catch a bad alloc - program terminates.
|
||||
#define VERIFY_THROWS_BADALLOC(a)
|
||||
#endif
|
||||
|
||||
template <typename MatrixType>
|
||||
void triggerMatrixBadAlloc(Index rows, Index cols) {
|
||||
|
@ -381,6 +381,7 @@ void svd_verify_assert_full_only(const MatrixType& input = MatrixType()) {
|
||||
|
||||
typedef Matrix<typename MatrixType::Scalar, RowsAtCompileTime, 1> RhsType;
|
||||
RhsType rhs = RhsType::Zero(input.rows());
|
||||
EIGEN_UNUSED_VARIABLE(rhs); // Only used if asserts are enabled.
|
||||
MatrixType m(input.rows(), input.cols());
|
||||
svd_fill_random(m);
|
||||
|
||||
@ -410,6 +411,7 @@ void svd_verify_assert(const MatrixType& input = MatrixType()) {
|
||||
enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime };
|
||||
typedef Matrix<typename MatrixType::Scalar, RowsAtCompileTime, 1> RhsType;
|
||||
RhsType rhs = RhsType::Zero(input.rows());
|
||||
EIGEN_UNUSED_VARIABLE(rhs); // Only used if asserts are enabled.
|
||||
MatrixType m(input.rows(), input.cols());
|
||||
svd_fill_random(m);
|
||||
|
||||
|
@ -214,6 +214,17 @@ void vectorwiseop_matrix(const MatrixType& m) {
|
||||
VERIFY_IS_EQUAL(m1.real().middleCols(0, fix<0>).colwise().maxCoeff().eval().cols(), 0);
|
||||
}
|
||||
|
||||
void vectorwiseop_mixedscalar() {
|
||||
Matrix4cd a = Matrix4cd::Random();
|
||||
Vector4cd b = Vector4cd::Random();
|
||||
b.imag().setZero();
|
||||
Vector4d b_real = b.real();
|
||||
|
||||
Matrix4cd c = a.array().rowwise() * b.array().transpose();
|
||||
Matrix4cd d = a.array().rowwise() * b_real.array().transpose();
|
||||
VERIFY_IS_CWISE_EQUAL(c, d);
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(vectorwiseop) {
|
||||
CALL_SUBTEST_1(vectorwiseop_array(Array22cd()));
|
||||
CALL_SUBTEST_2(vectorwiseop_array(Array<double, 3, 2>()));
|
||||
@ -226,4 +237,5 @@ EIGEN_DECLARE_TEST(vectorwiseop) {
|
||||
MatrixXd(internal::random<int>(1, EIGEN_TEST_MAX_SIZE), internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
|
||||
CALL_SUBTEST_7(vectorwiseop_matrix(VectorXd(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
|
||||
CALL_SUBTEST_7(vectorwiseop_matrix(RowVectorXd(internal::random<int>(1, EIGEN_TEST_MAX_SIZE))));
|
||||
CALL_SUBTEST_8(vectorwiseop_mixedscalar());
|
||||
}
|
||||
|
@ -10,19 +10,11 @@
|
||||
#include "main.h"
|
||||
|
||||
template <typename MatrixType>
|
||||
void matrixVisitor(const MatrixType& p) {
|
||||
void matrixVisitor_impl(MatrixType& m) {
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
|
||||
Index rows = p.rows();
|
||||
Index cols = p.cols();
|
||||
|
||||
// construct a random matrix where all coefficients are different
|
||||
MatrixType m;
|
||||
m = MatrixType::Random(rows, cols);
|
||||
for (Index i = 0; i < m.size(); i++)
|
||||
for (Index i2 = 0; i2 < i; i2++)
|
||||
while (numext::equal_strict(m(i), m(i2))) // yes, strict equality
|
||||
m(i) = internal::random<Scalar>();
|
||||
Index rows = m.rows();
|
||||
Index cols = m.cols();
|
||||
|
||||
Scalar minc = Scalar(1000), maxc = Scalar(-1000);
|
||||
Index minrow = 0, mincol = 0, maxrow = 0, maxcol = 0;
|
||||
@ -119,6 +111,22 @@ void matrixVisitor(const MatrixType& p) {
|
||||
VERIFY((numext::isnan)(eigen_maxc));
|
||||
}
|
||||
}
|
||||
template <typename MatrixType>
|
||||
void matrixVisitor(const MatrixType& p) {
|
||||
MatrixType m(p.rows(), p.cols());
|
||||
// construct a random matrix where all coefficients are different
|
||||
m.setRandom();
|
||||
for (Index i = 0; i < m.size(); i++)
|
||||
for (Index i2 = 0; i2 < i; i2++)
|
||||
while (numext::equal_strict(m(i), m(i2))) // yes, strict equality
|
||||
m(i) = internal::random<typename DenseBase<MatrixType>::Scalar>();
|
||||
MatrixType n = m;
|
||||
matrixVisitor_impl(m);
|
||||
// force outer-inner access pattern
|
||||
using BlockType = Block<MatrixType, Dynamic, Dynamic>;
|
||||
BlockType m_block = n.block(0, 0, n.rows(), n.cols());
|
||||
matrixVisitor_impl(m_block);
|
||||
}
|
||||
|
||||
template <typename VectorType>
|
||||
void vectorVisitor(const VectorType& w) {
|
||||
|
@ -24,6 +24,8 @@ void zeroReduction(const MatrixType& m) {
|
||||
VERIFY_RAISES_ASSERT(m.minCoeff());
|
||||
VERIFY_RAISES_ASSERT(m.maxCoeff());
|
||||
Index i, j;
|
||||
EIGEN_UNUSED_VARIABLE(i); // Only used if exceptions are enabled.
|
||||
EIGEN_UNUSED_VARIABLE(j);
|
||||
VERIFY_RAISES_ASSERT(m.minCoeff(&i, &j));
|
||||
VERIFY_RAISES_ASSERT(m.maxCoeff(&i, &j));
|
||||
VERIFY_RAISES_ASSERT(m.reshaped().minCoeff(&i));
|
||||
|
@ -45,7 +45,7 @@
|
||||
#include <thread>
|
||||
|
||||
#if defined(EIGEN_USE_THREADS) || defined(EIGEN_USE_SYCL)
|
||||
#include "ThreadPool"
|
||||
#include "../../../Eigen/ThreadPool"
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_USE_GPU
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -10,14 +10,11 @@
|
||||
#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_GPU_H)
|
||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_GPU_H
|
||||
|
||||
// This header file container defines fo gpu* macros which will resolve to
|
||||
// their equivalent hip* or cuda* versions depending on the compiler in use
|
||||
// A separate header (included at the end of this file) will undefine all
|
||||
#include "TensorGpuHipCudaDefines.h"
|
||||
|
||||
// IWYU pragma: private
|
||||
#include "./InternalHeaderCheck.h"
|
||||
|
||||
#include "../../../../../Eigen/src/Core/util/GpuHipCudaDefines.inc"
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
static const int kGpuScratchSize = 1024;
|
||||
@ -390,6 +387,6 @@ static EIGEN_DEVICE_FUNC inline void setGpuSharedMemConfig(gpuSharedMemConfig co
|
||||
} // end namespace Eigen
|
||||
|
||||
// undefine all the gpu* macros we defined at the beginning of the file
|
||||
#include "TensorGpuHipCudaUndefines.h"
|
||||
#include "../../../../../Eigen/src/Core/util/GpuHipCudaUndefines.inc"
|
||||
|
||||
#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_GPU_H
|
||||
|
@ -37,12 +37,13 @@
|
||||
* - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size.
|
||||
* - MKL (https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html) : fastest, free -- may be
|
||||
* incompatible with Eigen in GPL form.
|
||||
* - pocketfft (https://gitlab.mpcdf.mpg.de/mtr/pocketfft) : faster than kissfft, BSD 3-clause.
|
||||
* - PocketFFT/DUCC (https://gitlab.mpcdf.mpg.de/mtr/pocketfft, https://gitlab.mpcdf.mpg.de/mtr/ducc) : faster than kissfft, BSD 3-clause.
|
||||
* It is a heavily modified implementation of FFTPack, with the following advantages:
|
||||
* 1.strictly C++11 compliant
|
||||
* 2.more accurate twiddle factor computation
|
||||
* 3.very fast plan generation
|
||||
* 4.worst case complexity for transform sizes with large prime factors is N*log(N), because Bluestein's algorithm is
|
||||
* According to the author, DUCC contains the "evolution" of pocketfft, though the interface is very similar.
|
||||
* used for these cases
|
||||
*
|
||||
* \section FFTDesign Design
|
||||
@ -85,7 +86,7 @@
|
||||
#ifdef EIGEN_FFTW_DEFAULT
|
||||
// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size
|
||||
#include <fftw3.h>
|
||||
#include "src/FFT/ei_fftw_impl.h"
|
||||
#include "src/FFT/fftw_impl.h"
|
||||
namespace Eigen {
|
||||
// template <typename T> typedef struct internal::fftw_impl default_fft_impl; this does not work
|
||||
template <typename T>
|
||||
@ -93,7 +94,7 @@ struct default_fft_impl : public internal::fftw_impl<T> {};
|
||||
} // namespace Eigen
|
||||
#elif defined EIGEN_MKL_DEFAULT
|
||||
// intel Math Kernel Library: fastest, free -- may be incompatible with Eigen in GPL form
|
||||
#include "src/FFT/ei_imklfft_impl.h"
|
||||
#include "src/FFT/imklfft_impl.h"
|
||||
namespace Eigen {
|
||||
template <typename T>
|
||||
struct default_fft_impl : public internal::imklfft::imklfft_impl<T> {};
|
||||
@ -101,14 +102,24 @@ struct default_fft_impl : public internal::imklfft::imklfft_impl<T> {};
|
||||
#elif defined EIGEN_POCKETFFT_DEFAULT
|
||||
// internal::pocketfft_impl: a heavily modified implementation of FFTPack, with many advantages.
|
||||
#include <pocketfft_hdronly.h>
|
||||
#include "src/FFT/ei_pocketfft_impl.h"
|
||||
#include "src/FFT/pocketfft_impl.h"
|
||||
namespace Eigen {
|
||||
template <typename T>
|
||||
struct default_fft_impl : public internal::pocketfft_impl<T> {};
|
||||
} // namespace Eigen
|
||||
#elif defined EIGEN_DUCCFFT_DEFAULT
|
||||
#include <ducc0/fft/fft.h>
|
||||
#include <ducc0/infra/string_utils.h>
|
||||
#include <ducc0/fft/fft.h>
|
||||
#include <ducc0/fft/fftnd_impl.h>
|
||||
#include "src/FFT/duccfft_impl.h"
|
||||
namespace Eigen {
|
||||
template <typename T>
|
||||
struct default_fft_impl : public internal::duccfft_impl<T> {};
|
||||
} // namespace Eigen
|
||||
#else
|
||||
// internal::kissfft_impl: small, free, reasonably efficient default, derived from kissfft
|
||||
#include "src/FFT/ei_kissfft_impl.h"
|
||||
#include "src/FFT/kissfft_impl.h"
|
||||
namespace Eigen {
|
||||
template <typename T>
|
||||
struct default_fft_impl : public internal::kissfft_impl<T> {};
|
||||
@ -204,7 +215,8 @@ class FFT {
|
||||
|
||||
inline void fwd(Complex* dst, const Complex* src, Index nfft) { m_impl.fwd(dst, src, static_cast<int>(nfft)); }
|
||||
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT || \
|
||||
defined EIGEN_MKL_DEFAULT
|
||||
inline void fwd2(Complex* dst, const Complex* src, int n0, int n1) { m_impl.fwd2(dst, src, n0, n1); }
|
||||
#endif
|
||||
|
||||
@ -366,7 +378,8 @@ class FFT {
|
||||
inv(&dst[0], &src[0], nfft);
|
||||
}
|
||||
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT || \
|
||||
defined EIGEN_MKL_DEFAULT
|
||||
inline void inv2(Complex* dst, const Complex* src, int n0, int n1) {
|
||||
m_impl.inv2(dst, src, n0, n1);
|
||||
if (HasFlag(Unscaled) == false) scale(dst, 1. / (n0 * n1), n0 * n1);
|
||||
@ -385,7 +398,6 @@ class FFT {
|
||||
Matrix<T_Data, Dynamic, 1>::Map(x, nx) *= s;
|
||||
else
|
||||
Matrix<T_Data, Dynamic, 1>::MapAligned(x, nx) *= s;
|
||||
// Matrix<T_Data, Dynamic, Dynamic>::Map(x,nx) * s;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
71
unsupported/Eigen/src/FFT/duccfft_impl.h
Normal file
71
unsupported/Eigen/src/FFT/duccfft_impl.h
Normal file
@ -0,0 +1,71 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename _Scalar>
|
||||
struct duccfft_impl {
|
||||
using Scalar = _Scalar;
|
||||
using Complex = std::complex<Scalar>;
|
||||
using shape_t = ducc0::fmav_info::shape_t;
|
||||
using stride_t = ducc0::fmav_info::stride_t;
|
||||
|
||||
inline void clear() {}
|
||||
|
||||
inline void fwd(Complex* dst, const Scalar* src, int nfft) {
|
||||
const shape_t axes{0};
|
||||
ducc0::cfmav<Scalar> m_in(src, shape_t{static_cast<size_t>(nfft)});
|
||||
ducc0::vfmav<Complex> m_out(dst, shape_t{static_cast<size_t>(nfft) / 2 + 1});
|
||||
ducc0::r2c(m_in, m_out, axes, /*forward=*/true, /*scale=*/static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void fwd(Complex* dst, const Complex* src, int nfft) {
|
||||
const shape_t axes{0};
|
||||
ducc0::cfmav<Complex> m_in(src, shape_t{static_cast<size_t>(nfft)});
|
||||
ducc0::vfmav<Complex> m_out(dst, shape_t{static_cast<size_t>(nfft)});
|
||||
ducc0::c2c(m_in, m_out, axes, /*forward=*/true, /*scale=*/static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void inv(Scalar* dst, const Complex* src, int nfft) {
|
||||
const shape_t axes{0};
|
||||
ducc0::cfmav<Complex> m_in(src, shape_t{static_cast<size_t>(nfft) / 2 + 1});
|
||||
ducc0::vfmav<Scalar> m_out(dst, shape_t{static_cast<size_t>(nfft)});
|
||||
ducc0::c2r(m_in, m_out, axes, /*forward=*/false, /*scale=*/static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void inv(Complex* dst, const Complex* src, int nfft) {
|
||||
const shape_t axes{0};
|
||||
ducc0::cfmav<Complex> m_in(src, shape_t{static_cast<size_t>(nfft)});
|
||||
ducc0::vfmav<Complex> m_out(dst, shape_t{static_cast<size_t>(nfft)});
|
||||
ducc0::c2c(m_in, m_out, axes, /*forward=*/false, /*scale=*/static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void fwd2(Complex* dst, const Complex* src, int nfft0, int nfft1) {
|
||||
const shape_t axes{0, 1};
|
||||
const shape_t in_shape{static_cast<size_t>(nfft0), static_cast<size_t>(nfft1)};
|
||||
const shape_t out_shape{static_cast<size_t>(nfft0), static_cast<size_t>(nfft1)};
|
||||
const stride_t stride{static_cast<ptrdiff_t>(nfft1), static_cast<ptrdiff_t>(1)};
|
||||
ducc0::cfmav<Complex> m_in(src, in_shape, stride);
|
||||
ducc0::vfmav<Complex> m_out(dst, out_shape, stride);
|
||||
ducc0::c2c(m_in, m_out, axes, /*forward=*/true, /*scale=*/static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void inv2(Complex* dst, const Complex* src, int nfft0, int nfft1) {
|
||||
const shape_t axes{0, 1};
|
||||
const shape_t in_shape{static_cast<size_t>(nfft0), static_cast<size_t>(nfft1)};
|
||||
const shape_t out_shape{static_cast<size_t>(nfft0), static_cast<size_t>(nfft1)};
|
||||
const stride_t stride{static_cast<ptrdiff_t>(nfft1), static_cast<ptrdiff_t>(1)};
|
||||
ducc0::cfmav<Complex> m_in(src, in_shape, stride);
|
||||
ducc0::vfmav<Complex> m_out(dst, out_shape, stride);
|
||||
ducc0::c2c(m_in, m_out, axes, /*forward=*/false, /*scale=*/static_cast<Scalar>(1));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace Eigen
|
@ -5,17 +5,16 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
using namespace pocketfft;
|
||||
using namespace pocketfft::detail;
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <typename _Scalar>
|
||||
struct pocketfft_impl {
|
||||
typedef _Scalar Scalar;
|
||||
typedef std::complex<Scalar> Complex;
|
||||
using Scalar = _Scalar;
|
||||
using Complex = std::complex<Scalar>;
|
||||
using shape_t = pocketfft::shape_t;
|
||||
using stride_t = pocketfft::stride_t;
|
||||
|
||||
inline void clear() {}
|
||||
|
||||
@ -24,14 +23,14 @@ struct pocketfft_impl {
|
||||
const shape_t axes_{0};
|
||||
const stride_t stride_in{sizeof(Scalar)};
|
||||
const stride_t stride_out{sizeof(Complex)};
|
||||
r2c(shape_, stride_in, stride_out, axes_, FORWARD, src, dst, static_cast<Scalar>(1));
|
||||
pocketfft::r2c(shape_, stride_in, stride_out, axes_, pocketfft::FORWARD, src, dst, static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void fwd(Complex* dst, const Complex* src, int nfft) {
|
||||
const shape_t shape_{static_cast<size_t>(nfft)};
|
||||
const shape_t axes_{0};
|
||||
const stride_t stride_{sizeof(Complex)};
|
||||
c2c(shape_, stride_, stride_, axes_, FORWARD, src, dst, static_cast<Scalar>(1));
|
||||
pocketfft::c2c(shape_, stride_, stride_, axes_, pocketfft::FORWARD, src, dst, static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void inv(Scalar* dst, const Complex* src, int nfft) {
|
||||
@ -39,28 +38,28 @@ struct pocketfft_impl {
|
||||
const shape_t axes_{0};
|
||||
const stride_t stride_in{sizeof(Complex)};
|
||||
const stride_t stride_out{sizeof(Scalar)};
|
||||
c2r(shape_, stride_in, stride_out, axes_, BACKWARD, src, dst, static_cast<Scalar>(1));
|
||||
pocketfft::c2r(shape_, stride_in, stride_out, axes_, pocketfft::BACKWARD, src, dst, static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void inv(Complex* dst, const Complex* src, int nfft) {
|
||||
const shape_t shape_{static_cast<size_t>(nfft)};
|
||||
const shape_t axes_{0};
|
||||
const stride_t stride_{sizeof(Complex)};
|
||||
c2c(shape_, stride_, stride_, axes_, BACKWARD, src, dst, static_cast<Scalar>(1));
|
||||
pocketfft::c2c(shape_, stride_, stride_, axes_, pocketfft::BACKWARD, src, dst, static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void fwd2(Complex* dst, const Complex* src, int nfft0, int nfft1) {
|
||||
const shape_t shape_{static_cast<size_t>(nfft0), static_cast<size_t>(nfft1)};
|
||||
const shape_t axes_{0, 1};
|
||||
const stride_t stride_{static_cast<ptrdiff_t>(sizeof(Complex) * nfft1), static_cast<ptrdiff_t>(sizeof(Complex))};
|
||||
c2c(shape_, stride_, stride_, axes_, FORWARD, src, dst, static_cast<Scalar>(1));
|
||||
pocketfft::c2c(shape_, stride_, stride_, axes_, pocketfft::FORWARD, src, dst, static_cast<Scalar>(1));
|
||||
}
|
||||
|
||||
inline void inv2(Complex* dst, const Complex* src, int nfft0, int nfft1) {
|
||||
const shape_t shape_{static_cast<size_t>(nfft0), static_cast<size_t>(nfft1)};
|
||||
const shape_t axes_{0, 1};
|
||||
const stride_t stride_{static_cast<ptrdiff_t>(sizeof(Complex) * nfft1), static_cast<ptrdiff_t>(sizeof(Complex))};
|
||||
c2c(shape_, stride_, stride_, axes_, BACKWARD, src, dst, static_cast<Scalar>(1));
|
||||
pocketfft::c2c(shape_, stride_, stride_, axes_, pocketfft::BACKWARD, src, dst, static_cast<Scalar>(1));
|
||||
}
|
||||
};
|
||||
|
@ -284,12 +284,13 @@ template <typename MatrixType>
|
||||
struct matrix_exp_computeUV<MatrixType, long double> {
|
||||
template <typename ArgType>
|
||||
static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings) {
|
||||
using Scalar = typename traits<MatrixType>::Scalar;
|
||||
#if LDBL_MANT_DIG == 53 // double precision
|
||||
matrix_exp_computeUV<MatrixType, double>::run(arg, U, V, squarings);
|
||||
|
||||
#else
|
||||
|
||||
using Scalar = typename traits<MatrixType>::Scalar;
|
||||
|
||||
using std::frexp;
|
||||
using std::pow;
|
||||
const long double l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
|
||||
|
@ -1455,7 +1455,7 @@ struct zeta_impl {
|
||||
|
||||
if (q <= zero) {
|
||||
if (q == numext::floor(q)) {
|
||||
if (x == numext::floor(x) && long(x) % 2 == 0) {
|
||||
if (numext::rint(Scalar(0.5) * x) == Scalar(0.5) * x) {
|
||||
return maxnum;
|
||||
} else {
|
||||
return nan;
|
||||
|
@ -88,6 +88,25 @@ else()
|
||||
ei_add_property(EIGEN_MISSING_BACKENDS "pocketfft, ")
|
||||
endif()
|
||||
|
||||
if( NOT DUCC_ROOT AND ENV{DUCC_ROOT} )
|
||||
set( DUCC_ROOT $ENV{DUCC_ROOT} )
|
||||
endif()
|
||||
find_path(DUCCFFT
|
||||
NAMES "src/ducc0/fft/fft.h"
|
||||
PATHS ${DUCC_ROOT})
|
||||
message(INFO " ${DUCC_ROOT} ${DUCCFFT}")
|
||||
if(DUCCFFT)
|
||||
ei_add_property(EIGEN_TESTED_BACKENDS "duccfft, ")
|
||||
include_directories( "${DUCCFFT}/src" )
|
||||
add_library(ducc_lib "${DUCCFFT}/src/ducc0/infra/string_utils.cc" "${DUCCFFT}/src/ducc0/infra/threading.cc")
|
||||
target_compile_definitions(ducc_lib PUBLIC "DUCC0_NO_THREADING=1")
|
||||
ei_add_test(duccfft "-DEIGEN_DUCCFFT_DEFAULT -DDUCC0_NO_THREADING=1" "ducc_lib" )
|
||||
set_target_properties(ducc_lib duccfft PROPERTIES CXX_STANDARD 17)
|
||||
else()
|
||||
ei_add_property(EIGEN_MISSING_BACKENDS "duccfft, ")
|
||||
endif()
|
||||
|
||||
|
||||
option(EIGEN_TEST_OPENGL "Enable OpenGL support in unit tests" OFF)
|
||||
if(EIGEN_TEST_OPENGL)
|
||||
find_package(OpenGL)
|
||||
|
@ -14,8 +14,6 @@
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
|
||||
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
|
||||
|
||||
using Eigen::Tensor;
|
||||
|
||||
template <int Layout>
|
||||
|
@ -17,8 +17,6 @@
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
|
||||
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
|
||||
|
||||
using Eigen::Tensor;
|
||||
typedef Tensor<float, 1>::DimensionPair DimPair;
|
||||
|
||||
|
@ -17,8 +17,6 @@
|
||||
#include "OffByOneScalar.h"
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
|
||||
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
|
||||
|
||||
using Eigen::RowMajor;
|
||||
using Eigen::Tensor;
|
||||
|
||||
|
@ -15,8 +15,6 @@
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
|
||||
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
|
||||
|
||||
using Eigen::Tensor;
|
||||
|
||||
void test_gpu_nullary() {
|
||||
|
@ -16,8 +16,6 @@
|
||||
#include "main.h"
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
#include <Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
|
||||
|
||||
void test_gpu_random_uniform() {
|
||||
Tensor<float, 2> out(72, 97);
|
||||
out.setZero();
|
||||
|
@ -16,8 +16,6 @@
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
|
||||
#include <Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
|
||||
|
||||
using Eigen::Tensor;
|
||||
typedef Tensor<float, 1>::DimensionPair DimPair;
|
||||
|
||||
|
4
unsupported/test/duccfft.cpp
Normal file
4
unsupported/test/duccfft.cpp
Normal file
@ -0,0 +1,4 @@
|
||||
#define EIGEN_DUCCFFT_DEFAULT 1
|
||||
#include <ducc0/fft/fft.h> // Needs to be included before main.h
|
||||
#include <ducc0/fft/fftnd_impl.h> // Same requirement
|
||||
#include "fft_test_shared.h"
|
@ -272,7 +272,7 @@ EIGEN_DECLARE_TEST(FFTW) {
|
||||
CALL_SUBTEST(test_scalar<float>(2 * 3 * 4 * 5 * 7));
|
||||
CALL_SUBTEST(test_scalar<double>(2 * 3 * 4 * 5 * 7));
|
||||
|
||||
#if defined EIGEN_HAS_FFTWL || defined EIGEN_POCKETFFT_DEFAULT
|
||||
#if defined EIGEN_HAS_FFTWL || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT
|
||||
CALL_SUBTEST(test_complex<long double>(32));
|
||||
CALL_SUBTEST(test_complex<long double>(256));
|
||||
CALL_SUBTEST(test_complex<long double>(3 * 8));
|
||||
@ -294,13 +294,15 @@ EIGEN_DECLARE_TEST(FFTW) {
|
||||
// fail to build since Eigen limit the stack allocation size,too big here.
|
||||
// CALL_SUBTEST( ( test_complex2d<long double, 256, 256> () ) );
|
||||
#endif
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT || \
|
||||
defined EIGEN_MKL_DEFAULT
|
||||
CALL_SUBTEST((test_complex2d<float, 24, 24>()));
|
||||
CALL_SUBTEST((test_complex2d<float, 60, 60>()));
|
||||
CALL_SUBTEST((test_complex2d<float, 24, 60>()));
|
||||
CALL_SUBTEST((test_complex2d<float, 60, 24>()));
|
||||
#endif
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT || \
|
||||
defined EIGEN_MKL_DEFAULT
|
||||
CALL_SUBTEST((test_complex2d<double, 24, 24>()));
|
||||
CALL_SUBTEST((test_complex2d<double, 60, 60>()));
|
||||
CALL_SUBTEST((test_complex2d<double, 24, 60>()));
|
||||
|
Loading…
x
Reference in New Issue
Block a user