* add Regression module, from eigen1, improved, with doc and unit-test.

* fix .normalized() so that Random().normalized() works; since the return
type became complicated to write down i just let it return an actual
vector, perhaps not optimal.
* add Sparse/CMakeLists.txt. I suppose that it was intentional that it
didn't have CMakeLists, but in <=2.0 releases I'll just manually remove
Sparse.
This commit is contained in:
Benoit Jacob 2008-08-11 02:25:40 +00:00
parent 55e8d670ce
commit 17ec407ccd
14 changed files with 369 additions and 12 deletions

20
Eigen/Regression Normal file
View File

@ -0,0 +1,20 @@
#ifndef EIGEN_REGRESSION_MODULE_H
#define EIGEN_REGRESSION_MODULE_H
#include "LU"
namespace Eigen {
/** \defgroup Regression_Module Regression module
* This module provides linear regression and related features.
*
* \code
* #include <Eigen/Regression>
* \endcode
*/
#include "src/Regression/Regression.h"
} // namespace Eigen
#endif // EIGEN_REGRESSION_MODULE_H

View File

@ -4,4 +4,5 @@ ADD_SUBDIRECTORY(QR)
ADD_SUBDIRECTORY(Cholesky)
ADD_SUBDIRECTORY(Array)
ADD_SUBDIRECTORY(Geometry)
ADD_SUBDIRECTORY(Regression)
ADD_SUBDIRECTORY(Sparse)

View File

@ -292,10 +292,13 @@ inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<
* \sa norm(), normalize()
*/
template<typename Derived>
inline const typename MatrixBase<Derived>::ScalarQuotient1ReturnType
inline const typename MatrixBase<Derived>::EvalType
MatrixBase<Derived>::normalized() const
{
return *this / norm();
typedef typename ei_nested<Derived>::type Nested;
typedef typename ei_unref<Nested>::type _Nested;
_Nested n(derived());
return n / n.norm();
}
/** Normalizes the vector, i.e. divides it by its own norm.

View File

@ -198,6 +198,15 @@ class Matrix : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _MaxRows, _MaxCol
m_storage.resize(rows * cols, rows, cols);
}
inline void resize(int size)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
if(RowsAtCompileTime == 1)
m_storage.resize(size, 1, size);
else
m_storage.resize(size, size, 1);
}
/** Copies the value of the expression \a other into *this.
*
* *this is resized (if possible) to match the dimensions of \a other.

View File

@ -330,7 +330,7 @@ template<typename Derived> class MatrixBase
Scalar dot(const MatrixBase<OtherDerived>& other) const;
RealScalar norm2() const;
RealScalar norm() const;
const ScalarQuotient1ReturnType normalized() const;
const EvalType normalized() const;
void normalize();
Transpose<Derived> transpose();

View File

@ -37,13 +37,7 @@
#define EIGEN_UNROLLING_LIMIT 100
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER RowMajorBit
#else
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER 0
#endif
#define EIGEN_DEFAULT_MATRIX_FLAGS EIGEN_DEFAULT_MATRIX_STORAGE_ORDER
#define EIGEN_DEFAULT_MATRIX_FLAGS 0
/** Define a hint size when dealing with large matrices and L2 cache friendlyness
* More precisely, its square value represents the amount of bytes which can be assumed to stay in L2 cache.

View File

@ -335,7 +335,9 @@ bool LU<MatrixType>::solve(
return true;
}
/** \return the LU decomposition of \c *this.
/** \lu_module
*
* \return the LU decomposition of \c *this.
*
* \sa class LU
*/

View File

@ -0,0 +1,6 @@
FILE(GLOB Eigen_Regression_SRCS "*.h")
INSTALL(FILES
${Eigen_Regression_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Regression
)

View File

@ -0,0 +1,199 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#ifndef EIGEN_REGRESSION_H
#define EIGEN_REGRESSION_H
/** \ingroup Regression_Module
*
* \regression_module
*
* For a set of points, this function tries to express
* one of the coords as a linear (affine) function of the other coords.
*
* This is best explained by an example. This function works in full
* generality, for points in a space of arbitrary dimension, and also over
* the complex numbers, but for this example we will work in dimension 3
* over the real numbers (doubles).
*
* So let us work with the following set of 5 points given by their
* \f$(x,y,z)\f$ coordinates:
* @code
Vector3d points[5];
points[0] = Vector3d( 3.02, 6.89, -4.32 );
points[1] = Vector3d( 2.01, 5.39, -3.79 );
points[2] = Vector3d( 2.41, 6.01, -4.01 );
points[3] = Vector3d( 2.09, 5.55, -3.86 );
points[4] = Vector3d( 2.58, 6.32, -4.10 );
* @endcode
* Suppose that we want to express the second coordinate (\f$y\f$) as a linear
* expression in \f$x\f$ and \f$z\f$, that is,
* \f[ y=ax+bz+c \f]
* for some constants \f$a,b,c\f$. Thus, we want to find the best possible
* constants \f$a,b,c\f$ so that the plane of equation \f$y=ax+bz+c\f$ fits
* best the five above points. To do that, call this function as follows:
* @code
Vector3d coeffs; // will store the coefficients a, b, c
linearRegression(
5,
points,
&coeffs,
1 // the coord to express as a function of
// the other ones. 0 means x, 1 means y, 2 means z.
);
* @endcode
* Now the vector \a coeffs is approximately
* \f$( 0.495 , -1.927 , -2.906 )\f$.
* Thus, we get \f$a=0.495, b = -1.927, c = -2.906\f$. Let us check for
* instance how near points[0] is from the plane of equation \f$y=ax+bz+c\f$.
* Looking at the coords of points[0], we see that:
* \f[ax+bz+c = 0.495 * 3.02 + (-1.927) * (-4.32) + (-2.906) = 6.91.\f]
* On the other hand, we have \f$y=6.89\f$. We see that the values
* \f$6.91\f$ and \f$6.89\f$
* are near, so points[0] is very near the plane of equation \f$y=ax+bz+c\f$.
*
* Let's now describe precisely the parameters:
* @param numPoints the number of points
* @param points the array of pointers to the points on which to perform the linear regression
* @param retCoefficients pointer to the vector in which to store the result.
This vector must be of the same type and size as the
data points. The meaning of its coords is as follows.
For brevity, let \f$n=Size\f$,
\f$r_i=retCoefficients[i]\f$,
and \f$f=funcOfOthers\f$. Denote by
\f$x_0,\ldots,x_{n-1}\f$
the n coordinates in the n-dimensional space.
Then the result equation is:
\f[ x_f = r_0 x_0 + \cdots + r_{f-1}x_{f-1}
+ r_{f+1}x_{f+1} + \cdots + r_{n-1}x_{n-1} + r_n. \f]
* @param funcOfOthers Determines which coord to express as a function of the
others. Coords are numbered starting from 0, so that a
value of 0 means \f$x\f$, 1 means \f$y\f$,
2 means \f$z\f$, ...
*
* \sa fitHyperplane()
*/
template<typename VectorType>
void linearRegression(int numPoints,
VectorType **points,
VectorType *result,
int funcOfOthers )
{
typedef typename VectorType::Scalar Scalar;
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType)
ei_assert(numPoints >= 1);
int size = points[0]->size();
ei_assert(funcOfOthers >= 0 && funcOfOthers < size);
result->resize(size);
Matrix<Scalar, Dynamic, VectorType::SizeAtCompileTime,
Dynamic, VectorType::MaxSizeAtCompileTime, RowMajorBit>
m(numPoints, size);
if(funcOfOthers>0)
for(int i = 0; i < numPoints; i++)
m.row(i).start(funcOfOthers) = points[i]->start(funcOfOthers);
if(funcOfOthers<size-1)
for(int i = 0; i < numPoints; i++)
m.row(i).block(funcOfOthers, size-funcOfOthers-1)
= points[i]->end(size-funcOfOthers-1);
for(int i = 0; i < numPoints; i++)
m.row(i).coeffRef(size-1) = Scalar(1);
VectorType v(size);
v.setZero();
for(int i = 0; i < numPoints; i++)
v += m.row(i).adjoint() * points[i]->coeff(funcOfOthers);
ei_assert((m.adjoint()*m).lu().solve(v, result));
}
/** \ingroup Regression_Module
*
* \regression_module
*
* This function is quite similar to linearRegression(), so we refer to the
* documentation of this function and only list here the differences.
*
* The main difference from linearRegression() is that this function doesn't
* take a \a funcOfOthers argument. Instead, it finds a general equation
* of the form
* \f[ r_0 x_0 + \cdots + r_{n-1}x_{n-1} + r_n = 0, \f]
* where \f$n=Size\f$, \f$r_i=retCoefficients[i]\f$, and we denote by
* \f$x_0,\ldots,x_{n-1}\f$ the n coordinates in the n-dimensional space.
*
* Thus, the vector \a retCoefficients has size \f$n+1\f$, which is another
* difference from linearRegression().
*
* This functions proceeds by first determining which coord has the smallest variance,
* and then calls linearRegression() to express that coord as a function of the other ones.
*
* \sa linearRegression()
*/
template<typename VectorType, typename BigVectorType>
void fitHyperplane(int numPoints,
VectorType **points,
BigVectorType *result)
{
typedef typename VectorType::Scalar Scalar;
EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType)
EIGEN_STATIC_ASSERT_VECTOR_ONLY(BigVectorType)
ei_assert(numPoints >= 1);
int size = points[0]->size();
ei_assert(size+1 == result->size());
// now let's find out which coord varies the least. This is
// approximative. All that matters is that we don't pick a coordinate
// that varies orders of magnitude more than another one.
VectorType mean(size);
Matrix<typename NumTraits<Scalar>::Real,
VectorType::RowsAtCompileTime, VectorType::ColsAtCompileTime,
VectorType::MaxRowsAtCompileTime, VectorType::MaxColsAtCompileTime
> variance(size);
mean.setZero();
variance.setZero();
for(int i = 0; i < numPoints; i++)
mean += *(points[i]);
mean /= numPoints;
for(int j = 0; j < size; j++)
{
for(int i = 0; i < numPoints; i++)
variance.coeffRef(j) += ei_abs2(points[i]->coeff(j) - mean.coeff(j));
}
int coord_min_variance;
variance.minCoeff(&coord_min_variance);
// let's now perform a linear regression with respect to that
// not-too-much-varying coord
VectorType affine(size);
linearRegression(numPoints, points, &affine, coord_min_variance);
if(coord_min_variance>0)
result->start(coord_min_variance) = affine.start(coord_min_variance);
result->coeffRef(coord_min_variance) = static_cast<Scalar>(-1);
result->end(size-coord_min_variance) = affine.end(size-coord_min_variance);
}
#endif // EIGEN_REGRESSION_H

View File

@ -0,0 +1,6 @@
FILE(GLOB Eigen_Sparse_SRCS "*.h")
INSTALL(FILES
${Eigen_Sparse_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Sparse
)

View File

@ -202,6 +202,7 @@ ALIASES = "only_for_vectors=This is only for vectors (either row-
"cholesky_module=This is defined in the %Cholesky module. \code #include <Eigen/Cholesky> \endcode" \
"qr_module=This is defined in the %QR module. \code #include <Eigen/QR> \endcode" \
"geometry_module=This is defined in the %Geometry module. \code #include <Eigen/Geometry> \endcode" \
"regression_module=This is defined in the %Regression module. \code #include <Eigen/Regression> \endcode" \
"addexample=\anchor" \
"label=\bug"
# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C

View File

@ -105,5 +105,6 @@ EI_ADD_TEST(inverse)
EI_ADD_TEST(qr)
EI_ADD_TEST(eigensolver)
EI_ADD_TEST(geometry)
EI_ADD_TEST(regression)
ENDIF(BUILD_TESTS)

View File

@ -134,7 +134,6 @@ namespace Eigen
#endif // EIGEN_NO_ASSERTION_CHECKING
//#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER RowMajorBit
#define EIGEN_INTERNAL_DEBUGGING
#include <Eigen/Core>

116
test/regression.cpp Normal file
View File

@ -0,0 +1,116 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra. Eigen itself is part of the KDE project.
//
// Copyright (C) 2008 Benoit Jacob <jacob@math.jussieu.fr>
//
// Eigen is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// Alternatively, you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2 of
// the License, or (at your option) any later version.
//
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License and a copy of the GNU General Public License along with
// Eigen. If not, see <http://www.gnu.org/licenses/>.
#include "main.h"
#include <Eigen/Regression>
template<typename VectorType,
typename BigVecType>
void makeNoisyCohyperplanarPoints(int numPoints,
VectorType **points,
BigVecType *coeffs,
typename VectorType::Scalar noiseAmplitude )
{
typedef typename VectorType::Scalar Scalar;
const int size = points[0]->size();
// pick a random hyperplane, store the coefficients of its equation
coeffs->resize(size + 1);
for(int j = 0; j < size + 1; j++)
{
do {
coeffs->coeffRef(j) = ei_random<Scalar>();
} while(ei_abs(coeffs->coeffRef(j)) < 0.5);
}
// now pick numPoints random points on this hyperplane
for(int i = 0; i < numPoints; i++)
{
VectorType& cur_point = *(points[i]);
do
{
cur_point = VectorType::Random(size)/*.normalized()*/;
// project cur_point onto the hyperplane
Scalar x = - (coeffs->start(size).cwise()*cur_point).sum();
cur_point *= coeffs->coeff(size) / x;
} while( ei_abs(cur_point.norm()) < 0.5
|| ei_abs(cur_point.norm()) > 2.0 );
}
// add some noise to these points
for(int i = 0; i < numPoints; i++ )
*(points[i]) += noiseAmplitude * VectorType::Random(size);
}
template<typename VectorType,
typename BigVecType>
void check_fitHyperplane(int numPoints,
VectorType **points,
BigVecType *coeffs,
typename VectorType::Scalar tolerance)
{
int size = points[0]->size();
BigVecType result(size + 1);
fitHyperplane(numPoints, points, &result);
result /= result.coeff(size);
result *= coeffs->coeff(size);
typename VectorType::Scalar error = (result - *coeffs).norm() / coeffs->norm();
VERIFY(ei_abs(error) < ei_abs(tolerance));
}
void test_regression()
{
for(int i = 0; i < g_repeat; i++)
{
{
Vector2f points2f [1000];
Vector2f *points2f_ptrs [1000];
for(int i = 0; i < 1000; i++) points2f_ptrs[i] = &(points2f[i]);
Vector3f coeffs3f;
makeNoisyCohyperplanarPoints(1000, points2f_ptrs, &coeffs3f, 0.01f);
CALL_SUBTEST(check_fitHyperplane(10, points2f_ptrs, &coeffs3f, 0.05f));
CALL_SUBTEST(check_fitHyperplane(100, points2f_ptrs, &coeffs3f, 0.01f));
CALL_SUBTEST(check_fitHyperplane(1000, points2f_ptrs, &coeffs3f, 0.002f));
}
{
Vector4d points4d [1000];
Vector4d *points4d_ptrs [1000];
for(int i = 0; i < 1000; i++) points4d_ptrs[i] = &(points4d[i]);
Matrix<double,5,1> coeffs5d;
makeNoisyCohyperplanarPoints(1000, points4d_ptrs, &coeffs5d, 0.01);
CALL_SUBTEST(check_fitHyperplane(10, points4d_ptrs, &coeffs5d, 0.05));
CALL_SUBTEST(check_fitHyperplane(100, points4d_ptrs, &coeffs5d, 0.01));
CALL_SUBTEST(check_fitHyperplane(1000, points4d_ptrs, &coeffs5d, 0.002));
}
{
VectorXcd *points11cd_ptrs[1000];
for(int i = 0; i < 1000; i++) points11cd_ptrs[i] = new VectorXcd(11);
VectorXcd *coeffs12cd = new VectorXcd(12);
makeNoisyCohyperplanarPoints(1000, points11cd_ptrs, coeffs12cd, 0.01);
CALL_SUBTEST(check_fitHyperplane(100, points11cd_ptrs, coeffs12cd, 0.025));
CALL_SUBTEST(check_fitHyperplane(1000, points11cd_ptrs, coeffs12cd, 0.006));
}
}
}