mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-01 08:14:10 +08:00
Merge.
This commit is contained in:
commit
1f70bd4134
@ -62,7 +62,7 @@ struct default_packet_traits
|
|||||||
HasRsqrt = 0,
|
HasRsqrt = 0,
|
||||||
HasExp = 0,
|
HasExp = 0,
|
||||||
HasLog = 0,
|
HasLog = 0,
|
||||||
HasLog10 = 0,
|
HasLog10 = 0,
|
||||||
HasPow = 0,
|
HasPow = 0,
|
||||||
|
|
||||||
HasSin = 0,
|
HasSin = 0,
|
||||||
@ -71,9 +71,9 @@ struct default_packet_traits
|
|||||||
HasASin = 0,
|
HasASin = 0,
|
||||||
HasACos = 0,
|
HasACos = 0,
|
||||||
HasATan = 0,
|
HasATan = 0,
|
||||||
HasSinh = 0,
|
HasSinh = 0,
|
||||||
HasCosh = 0,
|
HasCosh = 0,
|
||||||
HasTanh = 0,
|
HasTanh = 0,
|
||||||
HasLGamma = 0,
|
HasLGamma = 0,
|
||||||
HasDiGamma = 0,
|
HasDiGamma = 0,
|
||||||
HasZeta = 0,
|
HasZeta = 0,
|
||||||
|
@ -705,12 +705,12 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>:
|
|||||||
isfinite_impl(const T& x)
|
isfinite_impl(const T& x)
|
||||||
{
|
{
|
||||||
#ifdef __CUDA_ARCH__
|
#ifdef __CUDA_ARCH__
|
||||||
return (isfinite)(x);
|
return (::isfinite)(x);
|
||||||
#elif EIGEN_USE_STD_FPCLASSIFY
|
#elif EIGEN_USE_STD_FPCLASSIFY
|
||||||
using std::isfinite;
|
using std::isfinite;
|
||||||
return isfinite EIGEN_NOT_A_MACRO (x);
|
return isfinite EIGEN_NOT_A_MACRO (x);
|
||||||
#else
|
#else
|
||||||
return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
|
return x<=NumTraits<T>::highest() && x>=NumTraits<T>::lowest();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -720,7 +720,7 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>:
|
|||||||
isinf_impl(const T& x)
|
isinf_impl(const T& x)
|
||||||
{
|
{
|
||||||
#ifdef __CUDA_ARCH__
|
#ifdef __CUDA_ARCH__
|
||||||
return (isinf)(x);
|
return (::isinf)(x);
|
||||||
#elif EIGEN_USE_STD_FPCLASSIFY
|
#elif EIGEN_USE_STD_FPCLASSIFY
|
||||||
using std::isinf;
|
using std::isinf;
|
||||||
return isinf EIGEN_NOT_A_MACRO (x);
|
return isinf EIGEN_NOT_A_MACRO (x);
|
||||||
@ -735,7 +735,7 @@ typename internal::enable_if<(!internal::is_integral<T>::value)&&(!NumTraits<T>:
|
|||||||
isnan_impl(const T& x)
|
isnan_impl(const T& x)
|
||||||
{
|
{
|
||||||
#ifdef __CUDA_ARCH__
|
#ifdef __CUDA_ARCH__
|
||||||
return (isnan)(x);
|
return (::isnan)(x);
|
||||||
#elif EIGEN_USE_STD_FPCLASSIFY
|
#elif EIGEN_USE_STD_FPCLASSIFY
|
||||||
using std::isnan;
|
using std::isnan;
|
||||||
return isnan EIGEN_NOT_A_MACRO (x);
|
return isnan EIGEN_NOT_A_MACRO (x);
|
||||||
|
@ -406,6 +406,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::ha
|
|||||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) {
|
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) {
|
||||||
return Eigen::half(::sqrtf(float(a)));
|
return Eigen::half(::sqrtf(float(a)));
|
||||||
}
|
}
|
||||||
|
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) {
|
||||||
|
return Eigen::half(::powf(float(a), float(b)));
|
||||||
|
}
|
||||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) {
|
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) {
|
||||||
return Eigen::half(::floorf(float(a)));
|
return Eigen::half(::floorf(float(a)));
|
||||||
}
|
}
|
||||||
@ -432,6 +435,9 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half&
|
|||||||
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
|
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) {
|
||||||
return Eigen::half(::sqrtf(float(a)));
|
return Eigen::half(::sqrtf(float(a)));
|
||||||
}
|
}
|
||||||
|
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) {
|
||||||
|
return Eigen::half(::powf(float(a), float(b)));
|
||||||
|
}
|
||||||
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
|
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) {
|
||||||
return Eigen::half(::floorf(float(a)));
|
return Eigen::half(::floorf(float(a)));
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,8 @@
|
|||||||
// we'll use on the host side (SSE, AVX, ...)
|
// we'll use on the host side (SSE, AVX, ...)
|
||||||
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
// Most of the following operations require arch >= 5.3
|
||||||
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
@ -33,14 +34,7 @@ template<> struct packet_traits<half> : default_packet_traits
|
|||||||
AlignedOnScalar = 1,
|
AlignedOnScalar = 1,
|
||||||
size=2,
|
size=2,
|
||||||
HasHalfPacket = 0,
|
HasHalfPacket = 0,
|
||||||
|
HasDiv = 1
|
||||||
HasDiv = 1,
|
|
||||||
HasLog = 1,
|
|
||||||
HasExp = 1,
|
|
||||||
HasSqrt = 1,
|
|
||||||
HasRsqrt = 1,
|
|
||||||
|
|
||||||
HasBlend = 0,
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -74,20 +68,12 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<half>(half* to, co
|
|||||||
|
|
||||||
template<>
|
template<>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) {
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Aligned>(const half* from) {
|
||||||
#if __CUDA_ARCH__ >= 320
|
|
||||||
return __ldg((const half2*)from);
|
return __ldg((const half2*)from);
|
||||||
#else
|
|
||||||
return __halves2half2(*(from+0), *(from+1));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) {
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const half* from) {
|
||||||
#if __CUDA_ARCH__ >= 320
|
|
||||||
return __halves2half2(__ldg(from+0), __ldg(from+1));
|
return __halves2half2(__ldg(from+0), __ldg(from+1));
|
||||||
#else
|
|
||||||
return __halves2half2(*(from+0), *(from+1));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) {
|
template<> EIGEN_DEVICE_FUNC inline half2 pgather<half, half2>(const half* from, Index stride) {
|
||||||
@ -120,8 +106,6 @@ ptranspose(PacketBlock<half2,2>& kernel) {
|
|||||||
kernel.packet[1] = __halves2half2(a2, b2);
|
kernel.packet[1] = __halves2half2(a2, b2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The following operations require arch >= 5.3
|
|
||||||
#if __CUDA_ARCH__ >= 530
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset<half2>(const half& a) {
|
||||||
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
|
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
|
||||||
}
|
}
|
||||||
@ -197,7 +181,6 @@ template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) {
|
|||||||
template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) {
|
template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) {
|
||||||
return __hmul(__low2half(a), __high2half(a));
|
return __hmul(__low2half(a), __high2half(a));
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
@ -71,6 +71,7 @@ struct functor_traits<scalar_cast_op<half, float> >
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct type_casting_traits<half, float> {
|
struct type_casting_traits<half, float> {
|
||||||
@ -82,22 +83,9 @@ struct type_casting_traits<half, float> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
||||||
float2 r1 = __half22float2(a);
|
float2 r1 = __half22float2(a);
|
||||||
float2 r2 = __half22float2(b);
|
float2 r2 = __half22float2(b);
|
||||||
return make_float4(r1.x, r1.y, r2.x, r2.y);
|
return make_float4(r1.x, r1.y, r2.x, r2.y);
|
||||||
#else
|
|
||||||
half r1;
|
|
||||||
r1.x = a.x & 0xFFFF;
|
|
||||||
half r2;
|
|
||||||
r2.x = (a.x & 0xFFFF0000) >> 16;
|
|
||||||
half r3;
|
|
||||||
r3.x = b.x & 0xFFFF;
|
|
||||||
half r4;
|
|
||||||
r4.x = (b.x & 0xFFFF0000) >> 16;
|
|
||||||
return make_float4(static_cast<float>(r1), static_cast<float>(r2),
|
|
||||||
static_cast<float>(r3), static_cast<float>(r4));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -111,19 +99,10 @@ struct type_casting_traits<float, half> {
|
|||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
|
template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
|
||||||
// Simply discard the second half of the input
|
// Simply discard the second half of the input
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
||||||
return __float22half2_rn(make_float2(a.x, a.y));
|
return __float22half2_rn(make_float2(a.x, a.y));
|
||||||
#else
|
|
||||||
half r1 = static_cast<half>(a.x);
|
|
||||||
half r2 = static_cast<half>(a.y);
|
|
||||||
half2 r;
|
|
||||||
r.x = 0;
|
|
||||||
r.x |= r1.x;
|
|
||||||
r.x |= (static_cast<unsigned int>(r2.x) << 16);
|
|
||||||
return r;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
@ -19,10 +19,25 @@ macro(ei_add_test_internal testname testname_with_suffix)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu)
|
if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu)
|
||||||
if (${ARGC} GREATER 2)
|
if(EIGEN_TEST_CUDA_CLANG)
|
||||||
cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
|
set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX)
|
||||||
|
if(CUDA_64_BIT_DEVICE_CODE)
|
||||||
|
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
|
||||||
|
else()
|
||||||
|
link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib")
|
||||||
|
endif()
|
||||||
|
if (${ARGC} GREATER 2)
|
||||||
|
add_executable(${targetname} ${filename})
|
||||||
|
else()
|
||||||
|
add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
|
||||||
|
endif()
|
||||||
|
target_link_libraries(${targetname} "cudart_static" "cuda" "dl" "rt" "pthread")
|
||||||
else()
|
else()
|
||||||
cuda_add_executable(${targetname} ${filename})
|
if (${ARGC} GREATER 2)
|
||||||
|
cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2})
|
||||||
|
else()
|
||||||
|
cuda_add_executable(${targetname} ${filename})
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
add_executable(${targetname} ${filename})
|
add_executable(${targetname} ${filename})
|
||||||
@ -316,7 +331,11 @@ macro(ei_testing_print_summary)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(EIGEN_TEST_CUDA)
|
if(EIGEN_TEST_CUDA)
|
||||||
message(STATUS "CUDA: ON")
|
if(EIGEN_TEST_CUDA_CLANG)
|
||||||
|
message(STATUS "CUDA: ON (using clang)")
|
||||||
|
else()
|
||||||
|
message(STATUS "CUDA: ON (using nvcc)")
|
||||||
|
endif()
|
||||||
else()
|
else()
|
||||||
message(STATUS "CUDA: OFF")
|
message(STATUS "CUDA: OFF")
|
||||||
endif()
|
endif()
|
||||||
|
@ -327,8 +327,14 @@ endif()
|
|||||||
|
|
||||||
# CUDA unit tests
|
# CUDA unit tests
|
||||||
option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF)
|
option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF)
|
||||||
|
option(EIGEN_TEST_CUDA_CLANG "Use clang instead of nvcc to compile the CUDA tests" OFF)
|
||||||
|
|
||||||
|
if(EIGEN_TEST_CUDA_CLANG AND NOT CMAKE_CXX_COMPILER MATCHES "clang")
|
||||||
|
message(WARNING "EIGEN_TEST_CUDA_CLANG is set, but CMAKE_CXX_COMPILER does not appear to be clang.")
|
||||||
|
endif()
|
||||||
|
|
||||||
if(EIGEN_TEST_CUDA)
|
if(EIGEN_TEST_CUDA)
|
||||||
|
|
||||||
find_package(CUDA 5.0)
|
find_package(CUDA 5.0)
|
||||||
if(CUDA_FOUND)
|
if(CUDA_FOUND)
|
||||||
|
|
||||||
@ -336,6 +342,9 @@ if(CUDA_FOUND)
|
|||||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||||
set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
|
set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
|
||||||
endif()
|
endif()
|
||||||
|
if(EIGEN_TEST_CUDA_CLANG)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30")
|
||||||
|
endif()
|
||||||
cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||||
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
||||||
|
|
||||||
|
@ -190,6 +190,10 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
|
|||||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||||
set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
|
set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE)
|
||||||
endif()
|
endif()
|
||||||
|
if(EIGEN_TEST_CUDA_CLANG)
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}")
|
||||||
|
endif()
|
||||||
|
|
||||||
set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"")
|
set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"")
|
||||||
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
|
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
|
||||||
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
||||||
@ -206,10 +210,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
|
|||||||
ei_add_test(cxx11_tensor_random_cuda)
|
ei_add_test(cxx11_tensor_random_cuda)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Operations other that casting of half floats are only supported starting with arch 5.3
|
ei_add_test(cxx11_tensor_of_float16_cuda)
|
||||||
if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 52)
|
|
||||||
ei_add_test(cxx11_tensor_of_float16_cuda)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
|
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
|
||||||
endif()
|
endif()
|
||||||
|
@ -14,6 +14,9 @@
|
|||||||
|
|
||||||
using std::sqrt;
|
using std::sqrt;
|
||||||
|
|
||||||
|
// tolerance for chekcing number of iterations
|
||||||
|
#define LM_EVAL_COUNT_TOL 4/3
|
||||||
|
|
||||||
int fcn_chkder(const VectorXd &x, VectorXd &fvec, MatrixXd &fjac, int iflag)
|
int fcn_chkder(const VectorXd &x, VectorXd &fvec, MatrixXd &fjac, int iflag)
|
||||||
{
|
{
|
||||||
/* subroutine fcn for chkder example. */
|
/* subroutine fcn for chkder example. */
|
||||||
@ -1023,7 +1026,8 @@ void testNistLanczos1(void)
|
|||||||
VERIFY_IS_EQUAL(lm.njev, 72);
|
VERIFY_IS_EQUAL(lm.njev, 72);
|
||||||
// check norm^2
|
// check norm^2
|
||||||
std::cout.precision(30);
|
std::cout.precision(30);
|
||||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4290986055242372e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
|
std::cout << lm.fvec.squaredNorm() << "\n";
|
||||||
|
VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
|
||||||
// check x
|
// check x
|
||||||
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
||||||
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
||||||
@ -1044,7 +1048,7 @@ void testNistLanczos1(void)
|
|||||||
VERIFY_IS_EQUAL(lm.nfev, 9);
|
VERIFY_IS_EQUAL(lm.nfev, 9);
|
||||||
VERIFY_IS_EQUAL(lm.njev, 8);
|
VERIFY_IS_EQUAL(lm.njev, 8);
|
||||||
// check norm^2
|
// check norm^2
|
||||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430571737783119393e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
|
VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
|
||||||
// check x
|
// check x
|
||||||
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
||||||
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
||||||
@ -1354,8 +1358,12 @@ void testNistMGH17(void)
|
|||||||
|
|
||||||
// check return value
|
// check return value
|
||||||
VERIFY_IS_EQUAL(info, 2);
|
VERIFY_IS_EQUAL(info, 2);
|
||||||
VERIFY(lm.nfev < 650); // 602
|
++g_test_level;
|
||||||
VERIFY(lm.njev < 600); // 545
|
VERIFY_IS_EQUAL(lm.nfev, 602); // 602
|
||||||
|
VERIFY_IS_EQUAL(lm.njev, 545); // 545
|
||||||
|
--g_test_level;
|
||||||
|
VERIFY(lm.nfev < 602 * LM_EVAL_COUNT_TOL);
|
||||||
|
VERIFY(lm.njev < 545 * LM_EVAL_COUNT_TOL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Second try
|
* Second try
|
||||||
|
@ -23,6 +23,9 @@
|
|||||||
|
|
||||||
using std::sqrt;
|
using std::sqrt;
|
||||||
|
|
||||||
|
// tolerance for chekcing number of iterations
|
||||||
|
#define LM_EVAL_COUNT_TOL 4/3
|
||||||
|
|
||||||
struct lmder_functor : DenseFunctor<double>
|
struct lmder_functor : DenseFunctor<double>
|
||||||
{
|
{
|
||||||
lmder_functor(void): DenseFunctor<double>(3,15) {}
|
lmder_functor(void): DenseFunctor<double>(3,15) {}
|
||||||
@ -631,7 +634,7 @@ void testNistLanczos1(void)
|
|||||||
VERIFY_IS_EQUAL(lm.nfev(), 79);
|
VERIFY_IS_EQUAL(lm.nfev(), 79);
|
||||||
VERIFY_IS_EQUAL(lm.njev(), 72);
|
VERIFY_IS_EQUAL(lm.njev(), 72);
|
||||||
// check norm^2
|
// check norm^2
|
||||||
// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.430899764097e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
|
VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
|
||||||
// check x
|
// check x
|
||||||
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
||||||
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
||||||
@ -652,7 +655,7 @@ void testNistLanczos1(void)
|
|||||||
VERIFY_IS_EQUAL(lm.nfev(), 9);
|
VERIFY_IS_EQUAL(lm.nfev(), 9);
|
||||||
VERIFY_IS_EQUAL(lm.njev(), 8);
|
VERIFY_IS_EQUAL(lm.njev(), 8);
|
||||||
// check norm^2
|
// check norm^2
|
||||||
// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.428595533845e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats
|
VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
|
||||||
// check x
|
// check x
|
||||||
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
||||||
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
VERIFY_IS_APPROX(x[1], 1.0000000001E+00);
|
||||||
@ -789,7 +792,8 @@ void testNistMGH10(void)
|
|||||||
MGH10_functor functor;
|
MGH10_functor functor;
|
||||||
LevenbergMarquardt<MGH10_functor> lm(functor);
|
LevenbergMarquardt<MGH10_functor> lm(functor);
|
||||||
info = lm.minimize(x);
|
info = lm.minimize(x);
|
||||||
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
|
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||||
|
// was: VERIFY_IS_EQUAL(info, 1);
|
||||||
|
|
||||||
// check norm^2
|
// check norm^2
|
||||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
|
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
|
||||||
@ -799,9 +803,13 @@ void testNistMGH10(void)
|
|||||||
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
|
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
|
||||||
|
|
||||||
// check return value
|
// check return value
|
||||||
//VERIFY_IS_EQUAL(info, 1);
|
|
||||||
|
++g_test_level;
|
||||||
VERIFY_IS_EQUAL(lm.nfev(), 284 );
|
VERIFY_IS_EQUAL(lm.nfev(), 284 );
|
||||||
VERIFY_IS_EQUAL(lm.njev(), 249 );
|
VERIFY_IS_EQUAL(lm.njev(), 249 );
|
||||||
|
--g_test_level;
|
||||||
|
VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL);
|
||||||
|
VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Second try
|
* Second try
|
||||||
@ -809,7 +817,10 @@ void testNistMGH10(void)
|
|||||||
x<< 0.02, 4000., 250.;
|
x<< 0.02, 4000., 250.;
|
||||||
// do the computation
|
// do the computation
|
||||||
info = lm.minimize(x);
|
info = lm.minimize(x);
|
||||||
|
++g_test_level;
|
||||||
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||||
|
// was: VERIFY_IS_EQUAL(info, 1);
|
||||||
|
--g_test_level;
|
||||||
|
|
||||||
// check norm^2
|
// check norm^2
|
||||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
|
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
|
||||||
@ -819,9 +830,12 @@ void testNistMGH10(void)
|
|||||||
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
|
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
|
||||||
|
|
||||||
// check return value
|
// check return value
|
||||||
//VERIFY_IS_EQUAL(info, 1);
|
++g_test_level;
|
||||||
VERIFY_IS_EQUAL(lm.nfev(), 126);
|
VERIFY_IS_EQUAL(lm.nfev(), 126);
|
||||||
VERIFY_IS_EQUAL(lm.njev(), 116);
|
VERIFY_IS_EQUAL(lm.njev(), 116);
|
||||||
|
--g_test_level;
|
||||||
|
VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL);
|
||||||
|
VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -896,8 +910,12 @@ void testNistBoxBOD(void)
|
|||||||
|
|
||||||
// check return value
|
// check return value
|
||||||
VERIFY_IS_EQUAL(info, 1);
|
VERIFY_IS_EQUAL(info, 1);
|
||||||
|
++g_test_level;
|
||||||
VERIFY_IS_EQUAL(lm.nfev(), 16 );
|
VERIFY_IS_EQUAL(lm.nfev(), 16 );
|
||||||
VERIFY_IS_EQUAL(lm.njev(), 15 );
|
VERIFY_IS_EQUAL(lm.njev(), 15 );
|
||||||
|
--g_test_level;
|
||||||
|
VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL);
|
||||||
|
VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL);
|
||||||
// check norm^2
|
// check norm^2
|
||||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
|
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
|
||||||
// check x
|
// check x
|
||||||
|
Loading…
x
Reference in New Issue
Block a user