diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index ef7841796..2c9bbb57c 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -23,7 +23,7 @@ namespace internal { outside of which tanh(x) = +/-1 in single precision. The input is clamped to the range [-c, c]. The value c is chosen as the smallest value where the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004] - the approxmation tanh(x) ~= x is used for better accuracy as x tends to zero. + the approximation tanh(x) ~= x is used for better accuracy as x tends to zero. This implementation works on both scalars and packets. */ diff --git a/Eigen/src/Core/PartialReduxEvaluator.h b/Eigen/src/Core/PartialReduxEvaluator.h index 4f0f08a81..4040ae726 100644 --- a/Eigen/src/Core/PartialReduxEvaluator.h +++ b/Eigen/src/Core/PartialReduxEvaluator.h @@ -31,7 +31,7 @@ namespace internal { * some (optional) processing of the outcome, e.g., division by n for mean. * * For the vectorized path let's observe that the packet-size and outer-unrolling -* are both decided by the assignement logic. So all we have to do is to decide +* are both decided by the assignment logic. So all we have to do is to decide * on the inner unrolling. * * For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h, diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 12d6507d1..3c3cc451e 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -596,7 +596,7 @@ template class VectorwiseOp return m_matrix += extendedTo(other.derived()); } - /** Substracts the vector \a other to each subvector of \c *this */ + /** Subtracts the vector \a other to each subvector of \c *this */ template EIGEN_DEVICE_FUNC ExpressionType& operator-=(const DenseBase& other) @@ -606,7 +606,7 @@ template class VectorwiseOp return m_matrix -= extendedTo(other.derived()); } - /** Multiples each subvector of \c *this by the vector \a other */ + /** Multiplies each subvector of \c *this by the vector \a other */ template EIGEN_DEVICE_FUNC ExpressionType& operator*=(const DenseBase& other) diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index bb3fce0f1..4f857269e 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -2234,7 +2234,7 @@ EIGEN_STRONG_INLINE Packet16bf F32ToBf16(const Packet16f& a) { #if defined(EIGEN_VECTORIZE_AVX512BF16) && EIGEN_GNUC_AT_LEAST(10, 1) // Since GCC 10.1 supports avx512bf16 and C style explicit cast - // (C++ static_cast is not supported yet), do converion via intrinsic + // (C++ static_cast is not supported yet), do conversion via intrinsic // and register path for performance. r = (__m256i)(_mm512_cvtneps_pbh(a)); diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 578c98699..071acf01b 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -572,7 +572,7 @@ inline float trig_reduce_huge (float xf, int *quadrant) using Eigen::numext::uint64_t; const double pio2_62 = 3.4061215800865545e-19; // pi/2 * 2^-62 - const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point foramt + const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point format // 192 bits of 2/pi for Payne-Hanek reduction // Bits are introduced by packet of 8 to enable aligned reads. diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index d96ee219b..382a2c8f9 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -3461,7 +3461,7 @@ EIGEN_ALWAYS_INLINE void zip_in_place(Packet4bf& p1, Packet4bf& p2) { EIGEN_STRONG_INLINE Packet4bf F32ToBf16(const Packet4f& p) { - // See the scalar implemention in BFloat16.h for a comprehensible explanation + // See the scalar implementation in BFloat16.h for a comprehensible explanation // of this fast rounding algorithm Packet4ui input = reinterpret_cast(p); diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index a0620b383..dc1368a0a 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -624,7 +624,7 @@ #define EIGEN_CPLUSPLUS 0 #endif -// The macro EIGEN_COMP_CXXVER defines the c++ verson expected by the compiler. +// The macro EIGEN_COMP_CXXVER defines the c++ version expected by the compiler. // For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER // is defined to 17. #if EIGEN_CPLUSPLUS > 201703L diff --git a/Eigen/src/Core/util/ReenableStupidWarnings.h b/Eigen/src/Core/util/ReenableStupidWarnings.h index 1ce6fd1b0..9dad396c9 100644 --- a/Eigen/src/Core/util/ReenableStupidWarnings.h +++ b/Eigen/src/Core/util/ReenableStupidWarnings.h @@ -1,5 +1,5 @@ #ifdef EIGEN_WARNINGS_DISABLED_2 -// "DisableStupidWarnings.h" was included twice recursively: Do not reenable warnings yet! +// "DisableStupidWarnings.h" was included twice recursively: Do not re-enable warnings yet! # undef EIGEN_WARNINGS_DISABLED_2 #elif defined(EIGEN_WARNINGS_DISABLED) @@ -17,7 +17,7 @@ #endif #if defined __NVCC__ -// Don't reenable the diagnostic messages, as it turns out these messages need +// Don't re-enable the diagnostic messages, as it turns out these messages need // to be disabled at the point of the template instantiation (i.e the user code) // otherwise they'll be triggered by nvcc. // #pragma diag_default code_is_unreachable diff --git a/ci/README.md b/ci/README.md index a5599cc4b..6a63eefb4 100644 --- a/ci/README.md +++ b/ci/README.md @@ -20,7 +20,7 @@ The build stage consists of the following jobs: In principle every build-job has a corresponding test-job, however testing supported and unsupported modules is divided into separate jobs. The test jobs in detail: -### Job dependecies +### Job dependencies | Job Name | Arch | OS | Compiler | C++11 | Module |-----------------------------------------------------|-----------|----------------|------------|---------|-------- diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 121ec7283..61504815f 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -889,7 +889,7 @@ void packetmath_real() { data1[0] = std::numeric_limits::denorm_min(); data1[1] = -std::numeric_limits::denorm_min(); h.store(data2, internal::plog(h.load(data1))); - // TODO(rmlarsen): Reenable. + // TODO(rmlarsen): Re-enable. // VERIFY_IS_EQUAL(std::log(std::numeric_limits::denorm_min()), data2[0]); VERIFY((numext::isnan)(data2[1])); } diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp index 261e80c06..4369789cd 100644 --- a/test/vectorwiseop.cpp +++ b/test/vectorwiseop.cpp @@ -41,7 +41,7 @@ template void vectorwiseop_array(const ArrayType& m) VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec); VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec); - // test substraction + // test subtraction m2 = m1; m2.colwise() -= colvec; VERIFY_IS_APPROX(m2, m1.colwise() - colvec); @@ -142,7 +142,7 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec); - // test substraction + // test subtraction m2 = m1; m2.colwise() -= colvec; VERIFY_IS_APPROX(m2, m1.colwise() - colvec); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 64f5bc2e2..8a7f5eb01 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -107,7 +107,7 @@ struct TensorEvaluator, Device> typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType::type PacketReturnType; static const int PacketSize = PacketType::size; - protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout; + protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator won't be standard layout; bool isCopy, nByOne, oneByN; public: typedef StorageMemory Storage; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h index 9f744ce4a..dd116748f 100755 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionSycl.h @@ -112,7 +112,7 @@ struct TTPanelSize { // BC : determines if supporting bank conflict is required static EIGEN_CONSTEXPR bool BC = true; // DoubleBuffer: determines if double buffering technique should be used (This can be disabled by - // EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device doesnot have sufficient local memory) + // EIGEN_SYCL_DISABLE_DOUBLE_BUFFER macro when the device does not have sufficient local memory) static EIGEN_CONSTEXPR bool DoubleBuffer = #ifdef EIGEN_SYCL_DISABLE_DOUBLE_BUFFER false; @@ -430,7 +430,7 @@ struct ThreadProperties { Otherwise, the result of contraction will be written iin a temporary buffer. This is the case when Tall/Skinny contraction is used. So in this case, a final reduction step is required to compute final output. - * \tparam contraction_tp: it is an enum value representing whether the local memroy/no local memory implementation of + * \tparam contraction_tp: it is an enum value representing whether the local memory/no local memory implementation of the algorithm to be used * * \param scratch: local memory containing tiles of LHS and RHS tensors for each work-group @@ -495,7 +495,7 @@ class TensorContractionKernel { * the TiledMemory for both local and private memory, the MemHolder structs is used as a helper to abstract out * different type of memory needed when local/no_local memory computation is called. * - * \tparam contraction_type: it is an enum value representing whether the local memroy/no local memory implementation + * \tparam contraction_type: it is an enum value representing whether the local memory/no local memory implementation of the algorithm to be used * \tparam the private memory size * \param ptr the tile memory pointer type diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h index 2452e1848..0bbc1e8fb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -897,7 +897,7 @@ struct TensorEvaluator 0); can_use_thread_local_packed_[n].store(false, std::memory_order_relaxed); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index 9cd8c6de1..454944e25 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -715,7 +715,7 @@ class QueueInterface { EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const { - // OpenCL doesnot have such concept + // OpenCL does not have such a concept return 2; } @@ -1035,7 +1035,7 @@ struct SyclDevice : public SyclDeviceBase { return queue_stream()->maxWorkItemSizes(); } EIGEN_STRONG_INLINE unsigned long maxSyclThreadsPerMultiProcessor() const { - // OpenCL doesnot have such concept + // OpenCL does not have such a concept return queue_stream()->maxSyclThreadsPerMultiProcessor(); } EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h index c5a83d259..748ba0a18 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -133,7 +133,7 @@ template class UniformRandomGenerator { m_state = PCG_XSH_RS_state(seed); #ifdef EIGEN_USE_SYCL // In SYCL it is not possible to build PCG_XSH_RS_state in one step. - // Therefor, we need two step to initializate the m_state. + // Therefore, we need two steps to initializate the m_state. // IN SYCL, the constructor of the functor is s called on the CPU // and we get the clock seed here from the CPU. However, This seed is //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function. @@ -246,7 +246,7 @@ template class NormalRandomGenerator { m_state = PCG_XSH_RS_state(seed); #ifdef EIGEN_USE_SYCL // In SYCL it is not possible to build PCG_XSH_RS_state in one step. - // Therefor, we need two steps to initializate the m_state. + // Therefore, we need two steps to initializate the m_state. // IN SYCL, the constructor of the functor is s called on the CPU // and we get the clock seed here from the CPU. However, This seed is //the same for all the thread. As unlike CUDA, the thread.ID, BlockID, etc is not a global function. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h index ec7790018..5b4942b76 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScanSycl.h @@ -25,7 +25,7 @@ * buffer is given as an input and all the threads within a work-group scan and * reduces the boundaries between the blocks (generated from the previous * kernel). and write the data on the temporary buffer. If the second kernel is - * required, the third and final kerenl (ScanAdjustmentKernelFunctor) will + * required, the third and final kernel (ScanAdjustmentKernelFunctor) will * adjust the final result into the output buffer. * The original algorithm for the parallel prefix sum can be found here: * diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 2a012eb8f..e6180420e 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -788,7 +788,7 @@ struct igammac_cf_impl { Scalar ax = main_igamma_term(a, x); // This is independent of mode. If this value is zero, // then the function value is zero. If the function value is zero, - // then we are in a neighborhood where the function value evalutes to zero, + // then we are in a neighborhood where the function value evaluates to zero, // so the derivative is zero. if (ax == zero) { return zero; @@ -899,7 +899,7 @@ struct igamma_series_impl { // This is independent of mode. If this value is zero, // then the function value is zero. If the function value is zero, - // then we are in a neighborhood where the function value evalutes to zero, + // then we are in a neighborhood where the function value evaluates to zero, // so the derivative is zero. if (ax == zero) { return zero; diff --git a/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/unsupported/test/cxx11_tensor_builtins_sycl.cpp index 72cb62fd5..df142fe8a 100644 --- a/unsupported/test/cxx11_tensor_builtins_sycl.cpp +++ b/unsupported/test/cxx11_tensor_builtins_sycl.cpp @@ -38,24 +38,24 @@ template T cwiseMin(T x, T y) { return cl::sycl::min(x, y); } } } -struct EqualAssignement { +struct EqualAssignment { template void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; } }; -struct PlusEqualAssignement { +struct PlusEqualAssignment { template void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; } }; template + typename Assignment, typename Operator> void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device, const array& tensor_range) { Operator op; - Assignement asgn; + Assignment asgn; { - /* Assignement(out, Operator(in)) */ + /* Assignment(out, Operator(in)) */ Tensor in(tensor_range); Tensor out(tensor_range); in = in.random() + DataType(0.01); @@ -84,7 +84,7 @@ void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device, sycl_device.deallocate(gpu_data_out); } { - /* Assignement(out, Operator(out)) */ + /* Assignment(out, Operator(out)) */ Tensor out(tensor_range); out = out.random() + DataType(0.01); Tensor reference(out); @@ -137,11 +137,11 @@ DECLARE_UNARY_STRUCT(isnan) DECLARE_UNARY_STRUCT(isfinite) DECLARE_UNARY_STRUCT(isinf) -template +template void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device, const array& tensor_range) { #define RUN_UNARY_TEST(FUNC) \ - test_unary_builtins_for_scalar(sycl_device, tensor_range) RUN_UNARY_TEST(abs); RUN_UNARY_TEST(sqrt); @@ -190,9 +190,9 @@ template void test_unary_builtins(const Eigen::SyclDevice& sycl_device, const array& tensor_range) { test_unary_builtins_for_assignement(sycl_device, tensor_range); + PlusEqualAssignment>(sycl_device, tensor_range); test_unary_builtins_for_assignement(sycl_device, tensor_range); + EqualAssignment>(sycl_device, tensor_range); test_unary_builtins_return_bool(sycl_device, tensor_range); test_unary_builtins_return_bool