Improved the portability of the tensor code

This commit is contained in:
Benoit Steiner 2016-05-11 23:29:09 -07:00
parent fae0493f98
commit 09653e1f82
3 changed files with 25 additions and 25 deletions

View File

@ -32,25 +32,25 @@ class TensorOpCost {
// model based on minimal reciprocal throughput numbers from Intel or
// Agner Fog's tables would be better than what is there now.
template <typename ArgType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int MulCost() {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() {
return internal::functor_traits<
internal::scalar_product_op<ArgType, ArgType> >::Cost;
}
template <typename ArgType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int AddCost() {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost() {
return internal::functor_traits<internal::scalar_sum_op<ArgType> >::Cost;
}
template <typename ArgType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int DivCost() {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost() {
return internal::functor_traits<
internal::scalar_quotient_op<ArgType, ArgType> >::Cost;
}
template <typename ArgType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int ModCost() {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost() {
return internal::functor_traits<internal::scalar_mod_op<ArgType> >::Cost;
}
template <typename SrcType, typename TargetType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static int CastCost() {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost() {
return internal::functor_traits<
internal::scalar_cast_op<SrcType, TargetType> >::Cost;
}

View File

@ -122,7 +122,7 @@ struct preserve_inner_most_dims<ReducedDims, NumTensorDims, RowMajor>{
template <int DimIndex, typename Self, typename Op>
struct GenericDimReducer {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) {
EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
GenericDimReducer<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
@ -183,7 +183,7 @@ struct InnerMostDimPreserver {
template <int DimIndex, typename Self, typename Op>
struct InnerMostDimPreserver<DimIndex, Self, Op, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) {
EIGEN_STATIC_ASSERT(DimIndex > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) {
const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex];
InnerMostDimPreserver<DimIndex-1, Self, Op>::reduce(self, input, reducer, accum);
@ -404,7 +404,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
: m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device)
{
EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)),
YOU_MADE_A_PROGRAMMING_MISTAKE);
@ -566,7 +566,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];

View File

@ -73,22 +73,22 @@ struct TensorUInt128
template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
static bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
return (lhs.high == rhs.high) & (lhs.low == rhs.low);
}
template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
static bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
return (lhs.high != rhs.high) | (lhs.low != rhs.low);
}
template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
static bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
if (lhs.high != rhs.high) {
return lhs.high > rhs.high;
@ -97,8 +97,8 @@ static bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<H
}
template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
static bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
if (lhs.high != rhs.high) {
return lhs.high < rhs.high;
@ -107,8 +107,8 @@ static bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR
}
template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
static TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low);
if (result.low < rhs.low) {
@ -118,8 +118,8 @@ static TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>
}
template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
static TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low);
if (result.low > lhs.low) {
@ -130,8 +130,8 @@ static TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>
template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
static TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
// Split each 128-bit integer into 4 32-bit integers, and then do the
// multiplications by hand as follow:
@ -205,8 +205,8 @@ static TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>
}
template <typename HL, typename LL, typename HR, typename LR>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
static TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
{
if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) {
return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);