diff --git a/Eigen/Regression b/Eigen/Regression new file mode 100644 index 000000000..8213ff77f --- /dev/null +++ b/Eigen/Regression @@ -0,0 +1,20 @@ +#ifndef EIGEN_REGRESSION_MODULE_H +#define EIGEN_REGRESSION_MODULE_H + +#include "LU" + +namespace Eigen { + +/** \defgroup Regression_Module Regression module + * This module provides linear regression and related features. + * + * \code + * #include + * \endcode + */ + +#include "src/Regression/Regression.h" + +} // namespace Eigen + +#endif // EIGEN_REGRESSION_MODULE_H diff --git a/Eigen/src/CMakeLists.txt b/Eigen/src/CMakeLists.txt index 1d060e85a..4b0e75316 100644 --- a/Eigen/src/CMakeLists.txt +++ b/Eigen/src/CMakeLists.txt @@ -4,4 +4,5 @@ ADD_SUBDIRECTORY(QR) ADD_SUBDIRECTORY(Cholesky) ADD_SUBDIRECTORY(Array) ADD_SUBDIRECTORY(Geometry) +ADD_SUBDIRECTORY(Regression) ADD_SUBDIRECTORY(Sparse) diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 9bdff50b3..c0caf8c06 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -292,10 +292,13 @@ inline typename NumTraits::Scalar>::Real MatrixBase< * \sa norm(), normalize() */ template -inline const typename MatrixBase::ScalarQuotient1ReturnType +inline const typename MatrixBase::EvalType MatrixBase::normalized() const { - return *this / norm(); + typedef typename ei_nested::type Nested; + typedef typename ei_unref::type _Nested; + _Nested n(derived()); + return n / n.norm(); } /** Normalizes the vector, i.e. divides it by its own norm. diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index a1c313108..5b96eedac 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -198,6 +198,15 @@ class Matrix : public MatrixBase class MatrixBase Scalar dot(const MatrixBase& other) const; RealScalar norm2() const; RealScalar norm() const; - const ScalarQuotient1ReturnType normalized() const; + const EvalType normalized() const; void normalize(); Transpose transpose(); diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 40db9a0a7..647cfcbfa 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -37,13 +37,7 @@ #define EIGEN_UNROLLING_LIMIT 100 #endif -#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR -#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER RowMajorBit -#else -#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER 0 -#endif - -#define EIGEN_DEFAULT_MATRIX_FLAGS EIGEN_DEFAULT_MATRIX_STORAGE_ORDER +#define EIGEN_DEFAULT_MATRIX_FLAGS 0 /** Define a hint size when dealing with large matrices and L2 cache friendlyness * More precisely, its square value represents the amount of bytes which can be assumed to stay in L2 cache. diff --git a/Eigen/src/LU/LU.h b/Eigen/src/LU/LU.h index 59f1051be..7e2cda921 100644 --- a/Eigen/src/LU/LU.h +++ b/Eigen/src/LU/LU.h @@ -335,7 +335,9 @@ bool LU::solve( return true; } -/** \return the LU decomposition of \c *this. +/** \lu_module + * + * \return the LU decomposition of \c *this. * * \sa class LU */ diff --git a/Eigen/src/Regression/CMakeLists.txt b/Eigen/src/Regression/CMakeLists.txt new file mode 100644 index 000000000..c691f52ac --- /dev/null +++ b/Eigen/src/Regression/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Regression_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Regression_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Regression + ) diff --git a/Eigen/src/Regression/Regression.h b/Eigen/src/Regression/Regression.h new file mode 100644 index 000000000..f8a51cb35 --- /dev/null +++ b/Eigen/src/Regression/Regression.h @@ -0,0 +1,199 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_REGRESSION_H +#define EIGEN_REGRESSION_H + +/** \ingroup Regression_Module + * + * \regression_module + * + * For a set of points, this function tries to express + * one of the coords as a linear (affine) function of the other coords. + * + * This is best explained by an example. This function works in full + * generality, for points in a space of arbitrary dimension, and also over + * the complex numbers, but for this example we will work in dimension 3 + * over the real numbers (doubles). + * + * So let us work with the following set of 5 points given by their + * \f$(x,y,z)\f$ coordinates: + * @code + Vector3d points[5]; + points[0] = Vector3d( 3.02, 6.89, -4.32 ); + points[1] = Vector3d( 2.01, 5.39, -3.79 ); + points[2] = Vector3d( 2.41, 6.01, -4.01 ); + points[3] = Vector3d( 2.09, 5.55, -3.86 ); + points[4] = Vector3d( 2.58, 6.32, -4.10 ); + * @endcode + * Suppose that we want to express the second coordinate (\f$y\f$) as a linear + * expression in \f$x\f$ and \f$z\f$, that is, + * \f[ y=ax+bz+c \f] + * for some constants \f$a,b,c\f$. Thus, we want to find the best possible + * constants \f$a,b,c\f$ so that the plane of equation \f$y=ax+bz+c\f$ fits + * best the five above points. To do that, call this function as follows: + * @code + Vector3d coeffs; // will store the coefficients a, b, c + linearRegression( + 5, + points, + &coeffs, + 1 // the coord to express as a function of + // the other ones. 0 means x, 1 means y, 2 means z. + ); + * @endcode + * Now the vector \a coeffs is approximately + * \f$( 0.495 , -1.927 , -2.906 )\f$. + * Thus, we get \f$a=0.495, b = -1.927, c = -2.906\f$. Let us check for + * instance how near points[0] is from the plane of equation \f$y=ax+bz+c\f$. + * Looking at the coords of points[0], we see that: + * \f[ax+bz+c = 0.495 * 3.02 + (-1.927) * (-4.32) + (-2.906) = 6.91.\f] + * On the other hand, we have \f$y=6.89\f$. We see that the values + * \f$6.91\f$ and \f$6.89\f$ + * are near, so points[0] is very near the plane of equation \f$y=ax+bz+c\f$. + * + * Let's now describe precisely the parameters: + * @param numPoints the number of points + * @param points the array of pointers to the points on which to perform the linear regression + * @param retCoefficients pointer to the vector in which to store the result. + This vector must be of the same type and size as the + data points. The meaning of its coords is as follows. + For brevity, let \f$n=Size\f$, + \f$r_i=retCoefficients[i]\f$, + and \f$f=funcOfOthers\f$. Denote by + \f$x_0,\ldots,x_{n-1}\f$ + the n coordinates in the n-dimensional space. + Then the result equation is: + \f[ x_f = r_0 x_0 + \cdots + r_{f-1}x_{f-1} + + r_{f+1}x_{f+1} + \cdots + r_{n-1}x_{n-1} + r_n. \f] + * @param funcOfOthers Determines which coord to express as a function of the + others. Coords are numbered starting from 0, so that a + value of 0 means \f$x\f$, 1 means \f$y\f$, + 2 means \f$z\f$, ... + * + * \sa fitHyperplane() + */ +template +void linearRegression(int numPoints, + VectorType **points, + VectorType *result, + int funcOfOthers ) +{ + typedef typename VectorType::Scalar Scalar; + EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType) + ei_assert(numPoints >= 1); + int size = points[0]->size(); + ei_assert(funcOfOthers >= 0 && funcOfOthers < size); + result->resize(size); + + Matrix + m(numPoints, size); + if(funcOfOthers>0) + for(int i = 0; i < numPoints; i++) + m.row(i).start(funcOfOthers) = points[i]->start(funcOfOthers); + if(funcOfOthersend(size-funcOfOthers-1); + for(int i = 0; i < numPoints; i++) + m.row(i).coeffRef(size-1) = Scalar(1); + + VectorType v(size); + v.setZero(); + for(int i = 0; i < numPoints; i++) + v += m.row(i).adjoint() * points[i]->coeff(funcOfOthers); + + ei_assert((m.adjoint()*m).lu().solve(v, result)); +} + +/** \ingroup Regression_Module + * + * \regression_module + * + * This function is quite similar to linearRegression(), so we refer to the + * documentation of this function and only list here the differences. + * + * The main difference from linearRegression() is that this function doesn't + * take a \a funcOfOthers argument. Instead, it finds a general equation + * of the form + * \f[ r_0 x_0 + \cdots + r_{n-1}x_{n-1} + r_n = 0, \f] + * where \f$n=Size\f$, \f$r_i=retCoefficients[i]\f$, and we denote by + * \f$x_0,\ldots,x_{n-1}\f$ the n coordinates in the n-dimensional space. + * + * Thus, the vector \a retCoefficients has size \f$n+1\f$, which is another + * difference from linearRegression(). + * + * This functions proceeds by first determining which coord has the smallest variance, + * and then calls linearRegression() to express that coord as a function of the other ones. + * + * \sa linearRegression() + */ +template +void fitHyperplane(int numPoints, + VectorType **points, + BigVectorType *result) +{ + typedef typename VectorType::Scalar Scalar; + EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(BigVectorType) + ei_assert(numPoints >= 1); + int size = points[0]->size(); + ei_assert(size+1 == result->size()); + + // now let's find out which coord varies the least. This is + // approximative. All that matters is that we don't pick a coordinate + // that varies orders of magnitude more than another one. + VectorType mean(size); + Matrix::Real, + VectorType::RowsAtCompileTime, VectorType::ColsAtCompileTime, + VectorType::MaxRowsAtCompileTime, VectorType::MaxColsAtCompileTime + > variance(size); + mean.setZero(); + variance.setZero(); + for(int i = 0; i < numPoints; i++) + mean += *(points[i]); + mean /= numPoints; + for(int j = 0; j < size; j++) + { + for(int i = 0; i < numPoints; i++) + variance.coeffRef(j) += ei_abs2(points[i]->coeff(j) - mean.coeff(j)); + } + + int coord_min_variance; + variance.minCoeff(&coord_min_variance); + + // let's now perform a linear regression with respect to that + // not-too-much-varying coord + VectorType affine(size); + linearRegression(numPoints, points, &affine, coord_min_variance); + + if(coord_min_variance>0) + result->start(coord_min_variance) = affine.start(coord_min_variance); + result->coeffRef(coord_min_variance) = static_cast(-1); + result->end(size-coord_min_variance) = affine.end(size-coord_min_variance); +} + + +#endif // EIGEN_REGRESSION_H diff --git a/Eigen/src/Sparse/CMakeLists.txt b/Eigen/src/Sparse/CMakeLists.txt new file mode 100644 index 000000000..b0ba6e140 --- /dev/null +++ b/Eigen/src/Sparse/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_Sparse_SRCS "*.h") + +INSTALL(FILES + ${Eigen_Sparse_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Sparse + ) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index eed3fe600..2f1a2d023 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -202,6 +202,7 @@ ALIASES = "only_for_vectors=This is only for vectors (either row- "cholesky_module=This is defined in the %Cholesky module. \code #include \endcode" \ "qr_module=This is defined in the %QR module. \code #include \endcode" \ "geometry_module=This is defined in the %Geometry module. \code #include \endcode" \ + "regression_module=This is defined in the %Regression module. \code #include \endcode" \ "addexample=\anchor" \ "label=\bug" # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cb52ba150..407893e3e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -105,5 +105,6 @@ EI_ADD_TEST(inverse) EI_ADD_TEST(qr) EI_ADD_TEST(eigensolver) EI_ADD_TEST(geometry) +EI_ADD_TEST(regression) ENDIF(BUILD_TESTS) diff --git a/test/main.h b/test/main.h index b19c19545..19f453922 100644 --- a/test/main.h +++ b/test/main.h @@ -134,7 +134,6 @@ namespace Eigen #endif // EIGEN_NO_ASSERTION_CHECKING -//#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER RowMajorBit #define EIGEN_INTERNAL_DEBUGGING #include diff --git a/test/regression.cpp b/test/regression.cpp new file mode 100644 index 000000000..98d58255e --- /dev/null +++ b/test/regression.cpp @@ -0,0 +1,116 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2008 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" +#include + +template +void makeNoisyCohyperplanarPoints(int numPoints, + VectorType **points, + BigVecType *coeffs, + typename VectorType::Scalar noiseAmplitude ) +{ + typedef typename VectorType::Scalar Scalar; + const int size = points[0]->size(); + // pick a random hyperplane, store the coefficients of its equation + coeffs->resize(size + 1); + for(int j = 0; j < size + 1; j++) + { + do { + coeffs->coeffRef(j) = ei_random(); + } while(ei_abs(coeffs->coeffRef(j)) < 0.5); + } + + // now pick numPoints random points on this hyperplane + for(int i = 0; i < numPoints; i++) + { + VectorType& cur_point = *(points[i]); + do + { + cur_point = VectorType::Random(size)/*.normalized()*/; + // project cur_point onto the hyperplane + Scalar x = - (coeffs->start(size).cwise()*cur_point).sum(); + cur_point *= coeffs->coeff(size) / x; + } while( ei_abs(cur_point.norm()) < 0.5 + || ei_abs(cur_point.norm()) > 2.0 ); + } + + // add some noise to these points + for(int i = 0; i < numPoints; i++ ) + *(points[i]) += noiseAmplitude * VectorType::Random(size); +} + +template +void check_fitHyperplane(int numPoints, + VectorType **points, + BigVecType *coeffs, + typename VectorType::Scalar tolerance) +{ + int size = points[0]->size(); + BigVecType result(size + 1); + fitHyperplane(numPoints, points, &result); + result /= result.coeff(size); + result *= coeffs->coeff(size); + typename VectorType::Scalar error = (result - *coeffs).norm() / coeffs->norm(); + VERIFY(ei_abs(error) < ei_abs(tolerance)); +} + +void test_regression() +{ + for(int i = 0; i < g_repeat; i++) + { + { + Vector2f points2f [1000]; + Vector2f *points2f_ptrs [1000]; + for(int i = 0; i < 1000; i++) points2f_ptrs[i] = &(points2f[i]); + Vector3f coeffs3f; + makeNoisyCohyperplanarPoints(1000, points2f_ptrs, &coeffs3f, 0.01f); + CALL_SUBTEST(check_fitHyperplane(10, points2f_ptrs, &coeffs3f, 0.05f)); + CALL_SUBTEST(check_fitHyperplane(100, points2f_ptrs, &coeffs3f, 0.01f)); + CALL_SUBTEST(check_fitHyperplane(1000, points2f_ptrs, &coeffs3f, 0.002f)); + } + + { + Vector4d points4d [1000]; + Vector4d *points4d_ptrs [1000]; + for(int i = 0; i < 1000; i++) points4d_ptrs[i] = &(points4d[i]); + Matrix coeffs5d; + makeNoisyCohyperplanarPoints(1000, points4d_ptrs, &coeffs5d, 0.01); + CALL_SUBTEST(check_fitHyperplane(10, points4d_ptrs, &coeffs5d, 0.05)); + CALL_SUBTEST(check_fitHyperplane(100, points4d_ptrs, &coeffs5d, 0.01)); + CALL_SUBTEST(check_fitHyperplane(1000, points4d_ptrs, &coeffs5d, 0.002)); + } + + { + VectorXcd *points11cd_ptrs[1000]; + for(int i = 0; i < 1000; i++) points11cd_ptrs[i] = new VectorXcd(11); + VectorXcd *coeffs12cd = new VectorXcd(12); + makeNoisyCohyperplanarPoints(1000, points11cd_ptrs, coeffs12cd, 0.01); + CALL_SUBTEST(check_fitHyperplane(100, points11cd_ptrs, coeffs12cd, 0.025)); + CALL_SUBTEST(check_fitHyperplane(1000, points11cd_ptrs, coeffs12cd, 0.006)); + } + } +}