mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Fix some typos found
This commit is contained in:
parent
76bb29c0c2
commit
afa616bc9e
@ -23,7 +23,7 @@ namespace internal {
|
|||||||
outside of which tanh(x) = +/-1 in single precision. The input is clamped
|
outside of which tanh(x) = +/-1 in single precision. The input is clamped
|
||||||
to the range [-c, c]. The value c is chosen as the smallest value where
|
to the range [-c, c]. The value c is chosen as the smallest value where
|
||||||
the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
|
the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004]
|
||||||
the approxmation tanh(x) ~= x is used for better accuracy as x tends to zero.
|
the approximation tanh(x) ~= x is used for better accuracy as x tends to zero.
|
||||||
|
|
||||||
This implementation works on both scalars and packets.
|
This implementation works on both scalars and packets.
|
||||||
*/
|
*/
|
||||||
|
@ -31,7 +31,7 @@ namespace internal {
|
|||||||
* some (optional) processing of the outcome, e.g., division by n for mean.
|
* some (optional) processing of the outcome, e.g., division by n for mean.
|
||||||
*
|
*
|
||||||
* For the vectorized path let's observe that the packet-size and outer-unrolling
|
* For the vectorized path let's observe that the packet-size and outer-unrolling
|
||||||
* are both decided by the assignement logic. So all we have to do is to decide
|
* are both decided by the assignment logic. So all we have to do is to decide
|
||||||
* on the inner unrolling.
|
* on the inner unrolling.
|
||||||
*
|
*
|
||||||
* For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
|
* For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
|
||||||
|
@ -596,7 +596,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
|||||||
return m_matrix += extendedTo(other.derived());
|
return m_matrix += extendedTo(other.derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Substracts the vector \a other to each subvector of \c *this */
|
/** Subtracts the vector \a other to each subvector of \c *this */
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
|
ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
|
||||||
@ -606,7 +606,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
|
|||||||
return m_matrix -= extendedTo(other.derived());
|
return m_matrix -= extendedTo(other.derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Multiples each subvector of \c *this by the vector \a other */
|
/** Multiplies each subvector of \c *this by the vector \a other */
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
|
ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
|
||||||
|
@ -2234,7 +2234,7 @@ EIGEN_STRONG_INLINE Packet16bf F32ToBf16(const Packet16f& a) {
|
|||||||
|
|
||||||
#if defined(EIGEN_VECTORIZE_AVX512BF16) && EIGEN_GNUC_AT_LEAST(10, 1)
|
#if defined(EIGEN_VECTORIZE_AVX512BF16) && EIGEN_GNUC_AT_LEAST(10, 1)
|
||||||
// Since GCC 10.1 supports avx512bf16 and C style explicit cast
|
// Since GCC 10.1 supports avx512bf16 and C style explicit cast
|
||||||
// (C++ static_cast is not supported yet), do converion via intrinsic
|
// (C++ static_cast is not supported yet), do conversion via intrinsic
|
||||||
// and register path for performance.
|
// and register path for performance.
|
||||||
r = (__m256i)(_mm512_cvtneps_pbh(a));
|
r = (__m256i)(_mm512_cvtneps_pbh(a));
|
||||||
|
|
||||||
|
@ -572,7 +572,7 @@ inline float trig_reduce_huge (float xf, int *quadrant)
|
|||||||
using Eigen::numext::uint64_t;
|
using Eigen::numext::uint64_t;
|
||||||
|
|
||||||
const double pio2_62 = 3.4061215800865545e-19; // pi/2 * 2^-62
|
const double pio2_62 = 3.4061215800865545e-19; // pi/2 * 2^-62
|
||||||
const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point foramt
|
const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point format
|
||||||
|
|
||||||
// 192 bits of 2/pi for Payne-Hanek reduction
|
// 192 bits of 2/pi for Payne-Hanek reduction
|
||||||
// Bits are introduced by packet of 8 to enable aligned reads.
|
// Bits are introduced by packet of 8 to enable aligned reads.
|
||||||
|
@ -3461,7 +3461,7 @@ EIGEN_ALWAYS_INLINE void zip_in_place<Packet4bf>(Packet4bf& p1, Packet4bf& p2) {
|
|||||||
|
|
||||||
EIGEN_STRONG_INLINE Packet4bf F32ToBf16(const Packet4f& p)
|
EIGEN_STRONG_INLINE Packet4bf F32ToBf16(const Packet4f& p)
|
||||||
{
|
{
|
||||||
// See the scalar implemention in BFloat16.h for a comprehensible explanation
|
// See the scalar implementation in BFloat16.h for a comprehensible explanation
|
||||||
// of this fast rounding algorithm
|
// of this fast rounding algorithm
|
||||||
Packet4ui input = reinterpret_cast<Packet4ui>(p);
|
Packet4ui input = reinterpret_cast<Packet4ui>(p);
|
||||||
|
|
||||||
|
@ -624,7 +624,7 @@
|
|||||||
#define EIGEN_CPLUSPLUS 0
|
#define EIGEN_CPLUSPLUS 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The macro EIGEN_COMP_CXXVER defines the c++ verson expected by the compiler.
|
// The macro EIGEN_COMP_CXXVER defines the c++ version expected by the compiler.
|
||||||
// For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER
|
// For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER
|
||||||
// is defined to 17.
|
// is defined to 17.
|
||||||
#if EIGEN_CPLUSPLUS > 201703L
|
#if EIGEN_CPLUSPLUS > 201703L
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#ifdef EIGEN_WARNINGS_DISABLED_2
|
#ifdef EIGEN_WARNINGS_DISABLED_2
|
||||||
// "DisableStupidWarnings.h" was included twice recursively: Do not reenable warnings yet!
|
// "DisableStupidWarnings.h" was included twice recursively: Do not re-enable warnings yet!
|
||||||
# undef EIGEN_WARNINGS_DISABLED_2
|
# undef EIGEN_WARNINGS_DISABLED_2
|
||||||
|
|
||||||
#elif defined(EIGEN_WARNINGS_DISABLED)
|
#elif defined(EIGEN_WARNINGS_DISABLED)
|
||||||
@ -17,7 +17,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined __NVCC__
|
#if defined __NVCC__
|
||||||
// Don't reenable the diagnostic messages, as it turns out these messages need
|
// Don't re-enable the diagnostic messages, as it turns out these messages need
|
||||||
// to be disabled at the point of the template instantiation (i.e the user code)
|
// to be disabled at the point of the template instantiation (i.e the user code)
|
||||||
// otherwise they'll be triggered by nvcc.
|
// otherwise they'll be triggered by nvcc.
|
||||||
// #pragma diag_default code_is_unreachable
|
// #pragma diag_default code_is_unreachable
|
||||||
|
@ -20,7 +20,7 @@ The build stage consists of the following jobs:
|
|||||||
|
|
||||||
In principle every build-job has a corresponding test-job, however testing supported and unsupported modules is divided into separate jobs. The test jobs in detail:
|
In principle every build-job has a corresponding test-job, however testing supported and unsupported modules is divided into separate jobs. The test jobs in detail:
|
||||||
|
|
||||||
### Job dependecies
|
### Job dependencies
|
||||||
|
|
||||||
| Job Name | Arch | OS | Compiler | C++11 | Module
|
| Job Name | Arch | OS | Compiler | C++11 | Module
|
||||||
|-----------------------------------------------------|-----------|----------------|------------|---------|--------
|
|-----------------------------------------------------|-----------|----------------|------------|---------|--------
|
||||||
|
@ -889,7 +889,7 @@ void packetmath_real() {
|
|||||||
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
||||||
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
||||||
h.store(data2, internal::plog(h.load(data1)));
|
h.store(data2, internal::plog(h.load(data1)));
|
||||||
// TODO(rmlarsen): Reenable.
|
// TODO(rmlarsen): Re-enable.
|
||||||
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
||||||
VERIFY((numext::isnan)(data2[1]));
|
VERIFY((numext::isnan)(data2[1]));
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,7 @@ template<typename ArrayType> void vectorwiseop_array(const ArrayType& m)
|
|||||||
VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec);
|
VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec);
|
||||||
VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec);
|
VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec);
|
||||||
|
|
||||||
// test substraction
|
// test subtraction
|
||||||
m2 = m1;
|
m2 = m1;
|
||||||
m2.colwise() -= colvec;
|
m2.colwise() -= colvec;
|
||||||
VERIFY_IS_APPROX(m2, m1.colwise() - colvec);
|
VERIFY_IS_APPROX(m2, m1.colwise() - colvec);
|
||||||
@ -142,7 +142,7 @@ template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)
|
|||||||
VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec);
|
VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec);
|
||||||
|
|
||||||
|
|
||||||
// test substraction
|
// test subtraction
|
||||||
m2 = m1;
|
m2 = m1;
|
||||||
m2.colwise() -= colvec;
|
m2.colwise() -= colvec;
|
||||||
VERIFY_IS_APPROX(m2, m1.colwise() - colvec);
|
VERIFY_IS_APPROX(m2, m1.colwise() - colvec);
|
||||||
|
@ -107,7 +107,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
|
||||||
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
|
||||||
protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout;
|
protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator won't be standard layout;
|
||||||
bool isCopy, nByOne, oneByN;
|
bool isCopy, nByOne, oneByN;
|
||||||
public:
|
public:
|
||||||
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
typedef StorageMemory<CoeffReturnType, Device> Storage;
|
||||||
|
@ -112,7 +112,7 @@ struct TTPanelSize {
|
|||||||
// BC : determines if supporting bank conflict is required
|
// BC : determines if supporting bank conflict is required
|
||||||
static EIGEN_CONSTEXPR bool BC = true;
|
static EIGEN_CONSTEXPR bool BC = true;
|
||||||
// DoubleBuffer: determines if double buffering technique should be used (This can be disabled by
|
// DoubleBuffer: determines if double buffering technique should be used (This can be disabled by
|
||||||
// EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device doesnot have sufficient local memory)
|
// EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device does not have sufficient local memory)
|
||||||
static EIGEN_CONSTEXPR bool DoubleBuffer =
|
static EIGEN_CONSTEXPR bool DoubleBuffer =
|
||||||
#ifdef EIGEN_SYCL_DISABLE_DOUBLE_BUFFER
|
#ifdef EIGEN_SYCL_DISABLE_DOUBLE_BUFFER
|
||||||
false;
|
false;
|
||||||
@ -430,7 +430,7 @@ struct ThreadProperties {
|
|||||||
Otherwise, the result of contraction will be written iin a temporary buffer. This is the case when Tall/Skinny
|
Otherwise, the result of contraction will be written iin a temporary buffer. This is the case when Tall/Skinny
|
||||||
contraction is used. So in this case, a final reduction step is required to compute final output.
|
contraction is used. So in this case, a final reduction step is required to compute final output.
|
||||||
|
|
||||||
* \tparam contraction_tp: it is an enum value representing whether the local memroy/no local memory implementation of
|
* \tparam contraction_tp: it is an enum value representing whether the local memory/no local memory implementation of
|
||||||
the algorithm to be used
|
the algorithm to be used
|
||||||
*
|
*
|
||||||
* \param scratch: local memory containing tiles of LHS and RHS tensors for each work-group
|
* \param scratch: local memory containing tiles of LHS and RHS tensors for each work-group
|
||||||
@ -495,7 +495,7 @@ class TensorContractionKernel {
|
|||||||
* the TiledMemory for both local and private memory, the MemHolder structs is used as a helper to abstract out
|
* the TiledMemory for both local and private memory, the MemHolder structs is used as a helper to abstract out
|
||||||
* different type of memory needed when local/no_local memory computation is called.
|
* different type of memory needed when local/no_local memory computation is called.
|
||||||
*
|
*
|
||||||
* \tparam contraction_type: it is an enum value representing whether the local memroy/no local memory implementation
|
* \tparam contraction_type: it is an enum value representing whether the local memory/no local memory implementation
|
||||||
of the algorithm to be used
|
of the algorithm to be used
|
||||||
* \tparam the private memory size
|
* \tparam the private memory size
|
||||||
* \param ptr the tile memory pointer type
|
* \param ptr the tile memory pointer type
|
||||||
|
@ -897,7 +897,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
} else {
|
} else {
|
||||||
// If we can't guarantee that all kernels in `k` slice will be
|
// If we can't guarantee that all kernels in `k` slice will be
|
||||||
// executed sequentially in current thread, it's no longer safe to use
|
// executed sequentially in current thread, it's no longer safe to use
|
||||||
// thread local memory in followig slices along the k dimensions.
|
// thread local memory in following slices along the k dimensions.
|
||||||
eigen_assert(k > 0);
|
eigen_assert(k > 0);
|
||||||
can_use_thread_local_packed_[n].store(false,
|
can_use_thread_local_packed_[n].store(false,
|
||||||
std::memory_order_relaxed);
|
std::memory_order_relaxed);
|
||||||
|
@ -715,7 +715,7 @@ class QueueInterface {
|
|||||||
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
|
EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; }
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
|
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
|
||||||
// OpenCL doesnot have such concept
|
// OpenCL does not have such a concept
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1035,7 +1035,7 @@ struct SyclDevice : public SyclDeviceBase {
|
|||||||
return queue_stream()->maxWorkItemSizes();
|
return queue_stream()->maxWorkItemSizes();
|
||||||
}
|
}
|
||||||
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
|
EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const {
|
||||||
// OpenCL doesnot have such concept
|
// OpenCL does not have such a concept
|
||||||
return queue_stream()->maxSyclThreadsPerMultiProcessor();
|
return queue_stream()->maxSyclThreadsPerMultiProcessor();
|
||||||
}
|
}
|
||||||
EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const {
|
EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const {
|
||||||
|
@ -133,7 +133,7 @@ template <typename T> class UniformRandomGenerator {
|
|||||||
m_state = PCG_XSH_RS_state(seed);
|
m_state = PCG_XSH_RS_state(seed);
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
|
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
|
||||||
// Therefor, we need two step to initializate the m_state.
|
// Therefore, we need two steps to initializate the m_state.
|
||||||
// IN SYCL, the constructor of the functor is s called on the CPU
|
// IN SYCL, the constructor of the functor is s called on the CPU
|
||||||
// and we get the clock seed here from the CPU. However, This seed is
|
// and we get the clock seed here from the CPU. However, This seed is
|
||||||
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
|
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
|
||||||
@ -246,7 +246,7 @@ template <typename T> class NormalRandomGenerator {
|
|||||||
m_state = PCG_XSH_RS_state(seed);
|
m_state = PCG_XSH_RS_state(seed);
|
||||||
#ifdef EIGEN_USE_SYCL
|
#ifdef EIGEN_USE_SYCL
|
||||||
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
|
// In SYCL it is not possible to build PCG_XSH_RS_state in one step.
|
||||||
// Therefor, we need two steps to initializate the m_state.
|
// Therefore, we need two steps to initializate the m_state.
|
||||||
// IN SYCL, the constructor of the functor is s called on the CPU
|
// IN SYCL, the constructor of the functor is s called on the CPU
|
||||||
// and we get the clock seed here from the CPU. However, This seed is
|
// and we get the clock seed here from the CPU. However, This seed is
|
||||||
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
|
//the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function.
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
* buffer is given as an input and all the threads within a work-group scan and
|
* buffer is given as an input and all the threads within a work-group scan and
|
||||||
* reduces the boundaries between the blocks (generated from the previous
|
* reduces the boundaries between the blocks (generated from the previous
|
||||||
* kernel). and write the data on the temporary buffer. If the second kernel is
|
* kernel). and write the data on the temporary buffer. If the second kernel is
|
||||||
* required, the third and final kerenl (ScanAdjustmentKernelFunctor) will
|
* required, the third and final kernel (ScanAdjustmentKernelFunctor) will
|
||||||
* adjust the final result into the output buffer.
|
* adjust the final result into the output buffer.
|
||||||
* The original algorithm for the parallel prefix sum can be found here:
|
* The original algorithm for the parallel prefix sum can be found here:
|
||||||
*
|
*
|
||||||
|
@ -788,7 +788,7 @@ struct igammac_cf_impl {
|
|||||||
Scalar ax = main_igamma_term<Scalar>(a, x);
|
Scalar ax = main_igamma_term<Scalar>(a, x);
|
||||||
// This is independent of mode. If this value is zero,
|
// This is independent of mode. If this value is zero,
|
||||||
// then the function value is zero. If the function value is zero,
|
// then the function value is zero. If the function value is zero,
|
||||||
// then we are in a neighborhood where the function value evalutes to zero,
|
// then we are in a neighborhood where the function value evaluates to zero,
|
||||||
// so the derivative is zero.
|
// so the derivative is zero.
|
||||||
if (ax == zero) {
|
if (ax == zero) {
|
||||||
return zero;
|
return zero;
|
||||||
@ -899,7 +899,7 @@ struct igamma_series_impl {
|
|||||||
|
|
||||||
// This is independent of mode. If this value is zero,
|
// This is independent of mode. If this value is zero,
|
||||||
// then the function value is zero. If the function value is zero,
|
// then the function value is zero. If the function value is zero,
|
||||||
// then we are in a neighborhood where the function value evalutes to zero,
|
// then we are in a neighborhood where the function value evaluates to zero,
|
||||||
// so the derivative is zero.
|
// so the derivative is zero.
|
||||||
if (ax == zero) {
|
if (ax == zero) {
|
||||||
return zero;
|
return zero;
|
||||||
|
@ -38,24 +38,24 @@ template <typename T> T cwiseMin(T x, T y) { return cl::sycl::min(x, y); }
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct EqualAssignement {
|
struct EqualAssignment {
|
||||||
template <typename Lhs, typename Rhs>
|
template <typename Lhs, typename Rhs>
|
||||||
void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
|
void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PlusEqualAssignement {
|
struct PlusEqualAssignment {
|
||||||
template <typename Lhs, typename Rhs>
|
template <typename Lhs, typename Rhs>
|
||||||
void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
|
void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename DataType, int DataLayout,
|
template <typename DataType, int DataLayout,
|
||||||
typename Assignement, typename Operator>
|
typename Assignment, typename Operator>
|
||||||
void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
|
void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
|
||||||
const array<int64_t, 3>& tensor_range) {
|
const array<int64_t, 3>& tensor_range) {
|
||||||
Operator op;
|
Operator op;
|
||||||
Assignement asgn;
|
Assignment asgn;
|
||||||
{
|
{
|
||||||
/* Assignement(out, Operator(in)) */
|
/* Assignment(out, Operator(in)) */
|
||||||
Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
|
Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
|
||||||
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
|
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
|
||||||
in = in.random() + DataType(0.01);
|
in = in.random() + DataType(0.01);
|
||||||
@ -84,7 +84,7 @@ void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
|
|||||||
sycl_device.deallocate(gpu_data_out);
|
sycl_device.deallocate(gpu_data_out);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
/* Assignement(out, Operator(out)) */
|
/* Assignment(out, Operator(out)) */
|
||||||
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
|
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
|
||||||
out = out.random() + DataType(0.01);
|
out = out.random() + DataType(0.01);
|
||||||
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
|
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
|
||||||
@ -137,11 +137,11 @@ DECLARE_UNARY_STRUCT(isnan)
|
|||||||
DECLARE_UNARY_STRUCT(isfinite)
|
DECLARE_UNARY_STRUCT(isfinite)
|
||||||
DECLARE_UNARY_STRUCT(isinf)
|
DECLARE_UNARY_STRUCT(isinf)
|
||||||
|
|
||||||
template <typename DataType, int DataLayout, typename Assignement>
|
template <typename DataType, int DataLayout, typename Assignment>
|
||||||
void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
|
void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
|
||||||
const array<int64_t, 3>& tensor_range) {
|
const array<int64_t, 3>& tensor_range) {
|
||||||
#define RUN_UNARY_TEST(FUNC) \
|
#define RUN_UNARY_TEST(FUNC) \
|
||||||
test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
|
test_unary_builtins_for_scalar<DataType, DataLayout, Assignment, \
|
||||||
op_##FUNC>(sycl_device, tensor_range)
|
op_##FUNC>(sycl_device, tensor_range)
|
||||||
RUN_UNARY_TEST(abs);
|
RUN_UNARY_TEST(abs);
|
||||||
RUN_UNARY_TEST(sqrt);
|
RUN_UNARY_TEST(sqrt);
|
||||||
@ -190,9 +190,9 @@ template <typename DataType, int DataLayout>
|
|||||||
void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
|
void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
|
||||||
const array<int64_t, 3>& tensor_range) {
|
const array<int64_t, 3>& tensor_range) {
|
||||||
test_unary_builtins_for_assignement<DataType, DataLayout,
|
test_unary_builtins_for_assignement<DataType, DataLayout,
|
||||||
PlusEqualAssignement>(sycl_device, tensor_range);
|
PlusEqualAssignment>(sycl_device, tensor_range);
|
||||||
test_unary_builtins_for_assignement<DataType, DataLayout,
|
test_unary_builtins_for_assignement<DataType, DataLayout,
|
||||||
EqualAssignement>(sycl_device, tensor_range);
|
EqualAssignment>(sycl_device, tensor_range);
|
||||||
test_unary_builtins_return_bool<DataType, DataLayout,
|
test_unary_builtins_return_bool<DataType, DataLayout,
|
||||||
op_isnan>(sycl_device, tensor_range);
|
op_isnan>(sycl_device, tensor_range);
|
||||||
test_unary_builtins_return_bool<DataType, DataLayout,
|
test_unary_builtins_return_bool<DataType, DataLayout,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user