* Fix CMakeLists.txt issue with SVD

* Fix on stack memory allocation issues
This commit is contained in:
Gael Guennebaud 2008-08-20 15:58:01 +00:00
parent 752ec27293
commit c501c7a4ef
5 changed files with 27 additions and 23 deletions

View File

@ -1,4 +1,4 @@
SET(Eigen_HEADERS Core CoreDeclarations LU Cholesky QR Geometry Sparse Array) SET(Eigen_HEADERS Core CoreDeclarations LU Cholesky QR Geometry Sparse Array SVD)
IF(BUILD_LIB) IF(BUILD_LIB)
SET(Eigen_SRCS SET(Eigen_SRCS

View File

@ -1,6 +1,7 @@
ADD_SUBDIRECTORY(Core) ADD_SUBDIRECTORY(Core)
ADD_SUBDIRECTORY(LU) ADD_SUBDIRECTORY(LU)
ADD_SUBDIRECTORY(QR) ADD_SUBDIRECTORY(QR)
ADD_SUBDIRECTORY(SVD)
ADD_SUBDIRECTORY(Cholesky) ADD_SUBDIRECTORY(Cholesky)
ADD_SUBDIRECTORY(Array) ADD_SUBDIRECTORY(Array)
ADD_SUBDIRECTORY(Geometry) ADD_SUBDIRECTORY(Geometry)

View File

@ -89,11 +89,10 @@ static void ei_cache_friendly_product(
const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize; const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize;
const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0)); const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
Scalar* EIGEN_RESTRICT block = 0; Scalar* EIGEN_RESTRICT block = 0;
const int allocBlockSize = sizeof(Scalar)*l2BlockRows*size; const int allocBlockSize = l2BlockRows*size;
const bool allocBlockUsingAlloca = EIGEN_USE_ALLOCA && allocBlockSize<=16000000; block = ei_alloc_stack(Scalar, allocBlockSize);
block = (Scalar*)ei_alloca_or_malloc(allocBlockUsingAlloca, allocBlockSize);
Scalar* EIGEN_RESTRICT rhsCopy Scalar* EIGEN_RESTRICT rhsCopy
= (Scalar*)ei_alloca_or_malloc(true, sizeof(Scalar)*l2BlockSizeAligned*l2BlockSizeAligned); = ei_alloc_stack(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
// loops on each L2 cache friendly blocks of the result // loops on each L2 cache friendly blocks of the result
for(int l2i=0; l2i<rows; l2i+=l2BlockRows) for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
@ -334,10 +333,8 @@ static void ei_cache_friendly_product(
} }
} }
if (!allocBlockUsingAlloca) ei_free_stack(block, Scalar, allocBlockSize);
free(block); ei_free_stack(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
if (!EIGEN_USE_ALLOCA)
free(rhsCopy);
} }
#endif // EIGEN_EXTERN_INSTANTIATIONS #endif // EIGEN_EXTERN_INSTANTIATIONS

View File

@ -550,7 +550,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
_res = &res.coeffRef(0); _res = &res.coeffRef(0);
else else
{ {
_res = (Scalar*)ei_alloca_or_malloc(true, sizeof(Scalar)*res.size()); _res = ei_alloc_stack(Scalar,res.size());
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res; Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
} }
ei_cache_friendly_product_colmajor_times_vector(res.size(), ei_cache_friendly_product_colmajor_times_vector(res.size(),
@ -558,9 +558,10 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
product.rhs(), _res); product.rhs(), _res);
if (!EvalToRes) if (!EvalToRes)
{
res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()); res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
ei_free_stack(_res, Scalar, res.size());
if(!EIGEN_USE_ALLOCA) free(_res); }
} }
}; };
@ -595,7 +596,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
_res = &res.coeffRef(0); _res = &res.coeffRef(0);
else else
{ {
_res = (Scalar*)ei_alloca_or_malloc(true, sizeof(Scalar)*res.size()); _res = ei_alloc_stack(Scalar, res.size());
Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res; Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res;
} }
ei_cache_friendly_product_colmajor_times_vector(res.size(), ei_cache_friendly_product_colmajor_times_vector(res.size(),
@ -603,9 +604,10 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
product.lhs().transpose(), _res); product.lhs().transpose(), _res);
if (!EvalToRes) if (!EvalToRes)
{
res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()); res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
ei_free_stack(_res, Scalar, res.size());
if(!EIGEN_USE_ALLOCA) free(_res); }
} }
}; };
@ -627,13 +629,13 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect
_rhs = &product.rhs().const_cast_derived().coeffRef(0); _rhs = &product.rhs().const_cast_derived().coeffRef(0);
else else
{ {
_rhs = (Scalar*)ei_alloca_or_malloc(true, sizeof(Scalar)*product.rhs().size()); _rhs = ei_alloc_stack(Scalar, product.rhs().size());
Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs(); Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs();
} }
ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(), ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
_rhs, product.rhs().size(), res); _rhs, product.rhs().size(), res);
if(!EIGEN_USE_ALLOCA) free(_rhs); if (!UseRhsDirectly) ei_free_stack(_rhs, Scalar, product.rhs().size());
} }
}; };
@ -655,13 +657,13 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
_lhs = &product.lhs().const_cast_derived().coeffRef(0); _lhs = &product.lhs().const_cast_derived().coeffRef(0);
else else
{ {
_lhs = (Scalar*)ei_alloca_or_malloc(true, sizeof(Scalar)*product.lhs().size()); _lhs = ei_alloc_stack(Scalar, product.lhs().size());
Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs(); Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs();
} }
ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(), ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
_lhs, product.lhs().size(), res); _lhs, product.lhs().size(), res);
if(!EIGEN_USE_ALLOCA) free(_lhs); if(!UseLhsDirectly) ei_free_stack(_lhs, Scalar, product.lhs().size());
} }
}; };

View File

@ -150,12 +150,16 @@ friend class Eigen::MatrixBase<Derived>;
#define EIGEN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b) #define EIGEN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
#define EIGEN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b) #define EIGEN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
/* ei_alloc_stack(TYPE,SIZE) allocates sizeof(TYPE)*SIZE bytes on the stack if sizeof(TYPE)*SIZE is smaller
* than EIGEN_STACK_ALLOCATION_LIMIT. Otherwise the memory is allocated using the operator new.
* Data allocated with ei_alloc_stack must be freed calling ei_free_stack(PTR,TYPE,SIZE)
*/
#ifdef __linux__ #ifdef __linux__
# define EIGEN_USE_ALLOCA 1 # define ei_alloc_stack(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>16000000) ? new TYPE[SIZE] : (TYPE*)alloca(sizeof(TYPE)*(SIZE)))
# define ei_alloca_or_malloc(condition, size) (condition?alloca(size):malloc(size)) # define ei_free_stack(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>16000000) delete[] PTR
#else #else
# define EIGEN_USE_ALLOCA 0 # define ei_alloc_stack(TYPE,SIZE) new TYPE[SIZE]
# define ei_alloca_or_malloc(condition, size) malloc(size) # define ei_free_stack(PTR,TYPE,SIZE) delete[] PTR
#endif #endif
#endif // EIGEN_MACROS_H #endif // EIGEN_MACROS_H