mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-02 11:15:12 +08:00
Improved the portability of the tensor code
This commit is contained in:
parent
fae0493f98
commit
09653e1f82
@ -32,25 +32,25 @@ class TensorOpCost {
|
||||
// model based on minimal reciprocal throughput numbers from Intel or
|
||||
// Agner Fog's tables would be better than what is there now.
|
||||
template <typename ArgType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int MulCost() {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() {
|
||||
return internal::functor_traits<
|
||||
internal::scalar_product_op<ArgType, ArgType> >::Cost;
|
||||
}
|
||||
template <typename ArgType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int AddCost() {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost() {
|
||||
return internal::functor_traits<internal::scalar_sum_op<ArgType> >::Cost;
|
||||
}
|
||||
template <typename ArgType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int DivCost() {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost() {
|
||||
return internal::functor_traits<
|
||||
internal::scalar_quotient_op<ArgType, ArgType> >::Cost;
|
||||
}
|
||||
template <typename ArgType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int ModCost() {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost() {
|
||||
return internal::functor_traits<internal::scalar_mod_op<ArgType> >::Cost;
|
||||
}
|
||||
template <typename SrcType, typename TargetType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int CastCost() {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost() {
|
||||
return internal::functor_traits<
|
||||
internal::scalar_cast_op<SrcType, TargetType> >::Cost;
|
||||
}
|
||||
|
@ -122,7 +122,7 @@ struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{
|
||||
template <int DimIndex, typename Self, typename Op>
|
||||
struct GenericDimReducer {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) {
|
||||
EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
|
||||
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
|
||||
GenericDimReducer<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
|
||||
@ -183,7 +183,7 @@ struct InnerMostDimPreserver {
|
||||
template <int DimIndex, typename Self, typename Op>
|
||||
struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
|
||||
EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
|
||||
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
|
||||
InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
|
||||
@ -404,7 +404,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
|
||||
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
|
||||
YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
|
||||
@ -566,7 +566,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
|
||||
template<int LoadMode>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||
{
|
||||
EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
|
||||
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
|
||||
|
||||
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
|
||||
|
@ -73,22 +73,22 @@ struct TensorUInt128
|
||||
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
static bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
return (lhs.high == rhs.high) & (lhs.low == rhs.low);
|
||||
}
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
static bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
return (lhs.high != rhs.high) | (lhs.low != rhs.low);
|
||||
}
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
static bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
if (lhs.high != rhs.high) {
|
||||
return lhs.high > rhs.high;
|
||||
@ -97,8 +97,8 @@ static bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<H
|
||||
}
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
static bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
if (lhs.high != rhs.high) {
|
||||
return lhs.high < rhs.high;
|
||||
@ -107,8 +107,8 @@ static bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR
|
||||
}
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
static TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low);
|
||||
if (result.low < rhs.low) {
|
||||
@ -118,8 +118,8 @@ static TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>
|
||||
}
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
static TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low);
|
||||
if (result.low > lhs.low) {
|
||||
@ -130,8 +130,8 @@ static TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>
|
||||
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
static TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
// Split each 128-bit integer into 4 32-bit integers, and then do the
|
||||
// multiplications by hand as follow:
|
||||
@ -205,8 +205,8 @@ static TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>
|
||||
}
|
||||
|
||||
template <typename HL, typename LL, typename HR, typename LR>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
static TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
|
||||
{
|
||||
if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) {
|
||||
return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
|
||||
|
Loading…
x
Reference in New Issue
Block a user