fix memory leak when a custom scalar throw an exception

(transplanted from 290205dfc049abc5a92c1191740b30fa91130ade
)
This commit is contained in:
Gael Guennebaud 2011-03-19 01:06:50 +01:00
parent 84c8b6d5c5
commit c3342b0bb4
11 changed files with 120 additions and 193 deletions

View File

@ -416,23 +416,15 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha); RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ResScalar* actualDestPtr; ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
bool freeDestPtr = false; evalToDest ? dest.data() : static_dest.data());
if (evalToDest)
{ if(!evalToDest)
actualDestPtr = &dest.coeffRef(0);
}
else
{ {
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size(); int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif #endif
if((actualDestPtr = static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
if(!alphaIsCompatible) if(!alphaIsCompatible)
{ {
MappedDest(actualDestPtr, dest.size()).setZero(); MappedDest(actualDestPtr, dest.size()).setZero();
@ -456,7 +448,6 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
dest += actualAlpha * MappedDest(actualDestPtr, dest.size()); dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else else
dest = MappedDest(actualDestPtr, dest.size()); dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
} }
} }
}; };
@ -490,23 +481,15 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
RhsScalar* actualRhsPtr; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
bool freeRhsPtr = false; DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if (DirectlyUseRhs)
{ if(!DirectlyUseRhs)
actualRhsPtr = const_cast<RhsScalar*>(&actualRhs.coeffRef(0));
}
else
{ {
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size(); int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif #endif
if((actualRhsPtr = static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
} }
@ -517,8 +500,6 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
actualRhsPtr, 1, actualRhsPtr, 1,
&dest.coeffRef(0,0), dest.innerStride(), &dest.coeffRef(0,0), dest.innerStride(),
actualAlpha); actualAlpha);
if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
} }
}; };

View File

@ -74,26 +74,19 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
// FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1 // FIXME find a way to allow an inner stride if packet_traits<Scalar>::size==1
bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1; bool useRhsDirectly = Rhs::InnerStrideAtCompileTime==1 || rhs.innerStride()==1;
RhsScalar* actualRhs;
if(useRhsDirectly) ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhs,rhs.size(),
{ (useRhsDirectly ? rhs.data() : 0));
actualRhs = &rhs.coeffRef(0);
} if(!useRhsDirectly)
else
{
actualRhs = ei_aligned_stack_new(RhsScalar,rhs.size());
MappedRhs(actualRhs,rhs.size()) = rhs; MappedRhs(actualRhs,rhs.size()) = rhs;
}
triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate, triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate,
(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor> (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs); ::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
if(!useRhsDirectly) if(!useRhsDirectly)
{
rhs = MappedRhs(actualRhs, rhs.size()); rhs = MappedRhs(actualRhs, rhs.size());
ei_aligned_stack_delete(RhsScalar, actualRhs, rhs.size());
}
} }
}; };

View File

@ -94,8 +94,9 @@ static void run(Index rows, Index cols, Index depth,
std::size_t sizeA = kc*mc; std::size_t sizeA = kc*mc;
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, sizeA); ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, 0);
RhsScalar* w = ei_aligned_stack_new(RhsScalar, sizeW); ei_declare_aligned_stack_constructed_variable(RhsScalar, w, sizeW, 0);
RhsScalar* blockB = blocking.blockB(); RhsScalar* blockB = blocking.blockB();
eigen_internal_assert(blockB!=0); eigen_internal_assert(blockB!=0);
@ -167,9 +168,10 @@ static void run(Index rows, Index cols, Index depth,
std::size_t sizeA = kc*mc; std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
LhsScalar *blockA = blocking.blockA()==0 ? ei_aligned_stack_new(LhsScalar, sizeA) : blocking.blockA();
RhsScalar *blockB = blocking.blockB()==0 ? ei_aligned_stack_new(RhsScalar, sizeB) : blocking.blockB(); ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
RhsScalar *blockW = blocking.blockW()==0 ? ei_aligned_stack_new(RhsScalar, sizeW) : blocking.blockW(); ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockW, sizeW, blocking.blockW());
// For each horizontal panel of the rhs, and corresponding panel of the lhs... // For each horizontal panel of the rhs, and corresponding panel of the lhs...
// (==GEMM_VAR1) // (==GEMM_VAR1)
@ -200,10 +202,6 @@ static void run(Index rows, Index cols, Index depth,
} }
} }
if(blocking.blockA()==0) ei_aligned_stack_delete(LhsScalar, blockA, sizeA);
if(blocking.blockB()==0) ei_aligned_stack_delete(RhsScalar, blockB, sizeB);
if(blocking.blockW()==0) ei_aligned_stack_delete(RhsScalar, blockW, sizeW);
} }
} }

View File

@ -83,10 +83,10 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
if(mc > Traits::nr) if(mc > Traits::nr)
mc = (mc/Traits::nr)*Traits::nr; mc = (mc/Traits::nr)*Traits::nr;
LhsScalar* blockA = ei_aligned_stack_new(LhsScalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size; std::size_t sizeB = sizeW + kc*size;
RhsScalar* allocatedBlockB = ei_aligned_stack_new(RhsScalar, sizeB); ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
RhsScalar* blockB = allocatedBlockB + sizeW; RhsScalar* blockB = allocatedBlockB + sizeW;
gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@ -125,8 +125,6 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
} }
} }
} }
ei_aligned_stack_delete(LhsScalar, blockA, kc*mc);
ei_aligned_stack_delete(RhsScalar, allocatedBlockB, sizeB);
} }
}; };

View File

@ -263,10 +263,10 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
// kc must smaller than mc // kc must smaller than mc
kc = std::min(kc,mc); kc = std::min(kc,mc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols; std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW; Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@ -313,9 +313,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
} }
} }
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
} }
}; };
@ -343,11 +340,10 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
Index mc = rows; // cache block size along the M direction Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols; std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW; Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
@ -369,9 +365,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
} }
} }
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
} }
}; };

View File

@ -62,14 +62,12 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
// FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed. // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
// if the rhs is not sequentially stored in memory we copy it to a temporary buffer, // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
// this is because we need to extract packets // this is because we need to extract packets
const Scalar* EIGEN_RESTRICT rhs = _rhs; ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);
if (rhsIncr!=1) if (rhsIncr!=1)
{ {
Scalar* r = ei_aligned_stack_new(Scalar, size);
const Scalar* it = _rhs; const Scalar* it = _rhs;
for (Index i=0; i<size; ++i, it+=rhsIncr) for (Index i=0; i<size; ++i, it+=rhsIncr)
r[i] = *it; rhs[i] = *it;
rhs = r;
} }
Index bound = std::max(Index(0),size-8) & 0xfffffffe; Index bound = std::max(Index(0),size-8) & 0xfffffffe;
@ -160,9 +158,6 @@ static EIGEN_DONT_INLINE void product_selfadjoint_vector(
} }
res[j] += alpha * t2; res[j] += alpha * t2;
} }
if(rhsIncr!=1)
ei_aligned_stack_delete(Scalar, const_cast<Scalar*>(rhs), size);
} }
} // end namespace internal } // end namespace internal
@ -211,40 +206,28 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest; internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs; internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
EvalToDest ? dest.data() : static_dest.data());
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
bool freeDestPtr = false; if(!EvalToDest)
ResScalar* actualDestPtr;
if(EvalToDest)
actualDestPtr = dest.data();
else
{ {
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size(); int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif #endif
if((actualDestPtr=static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
MappedDest(actualDestPtr, dest.size()) = dest; MappedDest(actualDestPtr, dest.size()) = dest;
} }
bool freeRhsPtr = false; if(!UseRhs)
RhsScalar* actualRhsPtr;
if(UseRhs)
actualRhsPtr = const_cast<RhsScalar*>(rhs.data());
else
{ {
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = rhs.size(); int size = rhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif #endif
if((actualRhsPtr=static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar,rhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs; Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
} }
@ -259,11 +242,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
); );
if(!EvalToDest) if(!EvalToDest)
{
dest = MappedDest(actualDestPtr, dest.size()); dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
}
if(freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, rhs.size());
} }
}; };

View File

@ -81,27 +81,17 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1 UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
}; };
internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other; internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
bool freeOtherPtr = false; ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),
Scalar* actualOtherPtr; (UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
if(UseOtherDirectly)
actualOtherPtr = const_cast<Scalar*>(actualOther.data()); if(!UseOtherDirectly)
else
{
if((actualOtherPtr=static_other.data())==0)
{
freeOtherPtr = true;
actualOtherPtr = ei_aligned_stack_new(Scalar,other.size());
}
Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther; Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
}
selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo, selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
(!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex> (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha); ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
if((!UseOtherDirectly) && freeOtherPtr) ei_aligned_stack_delete(Scalar, actualOtherPtr, other.size());
} }
}; };

View File

@ -118,11 +118,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Index mc = rows; // cache block size along the M direction Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols; std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW; Scalar* blockB = allocatedBlockB + sizeW;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer; Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
@ -208,10 +207,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
} }
} }
} }
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
// delete[] allocatedBlockB;
} }
}; };
@ -246,10 +241,10 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index nc = cols; // cache block size along the N direction Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols; std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar,sizeB); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW; Scalar* blockB = allocatedBlockB + sizeW;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer; Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
@ -347,9 +342,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
-1, -1, 0, 0, allocatedBlockB); -1, -1, 0, 0, allocatedBlockB);
} }
} }
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
} }
}; };

View File

@ -41,9 +41,6 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
static EIGEN_DONT_INLINE void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride, static EIGEN_DONT_INLINE void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha) const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
{ {
EIGEN_UNUSED_VARIABLE(resIncr);
eigen_assert(resIncr==1);
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap; typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
@ -95,9 +92,6 @@ struct product_triangular_matrix_vector<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
static void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride, static void run(Index rows, Index cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha) const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
{ {
eigen_assert(rhsIncr==1);
EIGEN_UNUSED_VARIABLE(rhsIncr);
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH; static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap; typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
@ -185,7 +179,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{ {
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
typedef TriangularProduct<(Mode & UnitDiag) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose; typedef TriangularProduct<(Mode & UnitDiag) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
Transpose<Dest> dstT(dst); Transpose<Dest> dstT(dst);
internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run( internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
@ -235,23 +229,15 @@ template<> struct trmv_selector<ColMajor>
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha); RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ResScalar* actualDestPtr; ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
bool freeDestPtr = false; evalToDest ? dest.data() : static_dest.data());
if (evalToDest)
{ if(!evalToDest)
actualDestPtr = dest.data();
}
else
{ {
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size(); int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif #endif
if((actualDestPtr = static_dest.data())==0)
{
freeDestPtr = true;
actualDestPtr = ei_aligned_stack_new(ResScalar,dest.size());
}
if(!alphaIsCompatible) if(!alphaIsCompatible)
{ {
MappedDest(actualDestPtr, dest.size()).setZero(); MappedDest(actualDestPtr, dest.size()).setZero();
@ -277,7 +263,6 @@ template<> struct trmv_selector<ColMajor>
dest += actualAlpha * MappedDest(actualDestPtr, dest.size()); dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else else
dest = MappedDest(actualDestPtr, dest.size()); dest = MappedDest(actualDestPtr, dest.size());
if(freeDestPtr) ei_aligned_stack_delete(ResScalar, actualDestPtr, dest.size());
} }
} }
}; };
@ -310,23 +295,15 @@ template<> struct trmv_selector<RowMajor>
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs; gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
RhsScalar* actualRhsPtr; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
bool freeRhsPtr = false; DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if (DirectlyUseRhs)
{ if(!DirectlyUseRhs)
actualRhsPtr = const_cast<RhsScalar*>(actualRhs.data());
}
else
{ {
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size(); int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif #endif
if((actualRhsPtr = static_rhs.data())==0)
{
freeRhsPtr = true;
actualRhsPtr = ei_aligned_stack_new(RhsScalar, actualRhs.size());
}
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
} }
@ -340,8 +317,6 @@ template<> struct trmv_selector<RowMajor>
actualRhsPtr,1, actualRhsPtr,1,
dest.data(),dest.innerStride(), dest.data(),dest.innerStride(),
actualAlpha); actualAlpha);
if((!DirectlyUseRhs) && freeRhsPtr) ei_aligned_stack_delete(RhsScalar, actualRhsPtr, prod.rhs().size());
} }
}; };

View File

@ -70,10 +70,10 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index nc = cols; // cache block size along the N direction Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols; std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW; Scalar* blockB = allocatedBlockB + sizeW;
conj_if<Conjugate> conj; conj_if<Conjugate> conj;
@ -174,9 +174,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
} }
} }
} }
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
} }
}; };
@ -209,10 +206,10 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
Index nc = rows; // cache block size along the N direction Index nc = rows; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc); computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*size; std::size_t sizeB = sizeW + kc*size;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW; Scalar* blockB = allocatedBlockB + sizeW;
conj_if<Conjugate> conj; conj_if<Conjugate> conj;
@ -314,9 +311,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
-1, -1, 0, 0, allocatedBlockB); -1, -1, 0, 0, allocatedBlockB);
} }
} }
ei_aligned_stack_delete(Scalar, blockA, kc*mc);
ei_aligned_stack_delete(Scalar, allocatedBlockB, sizeB);
} }
}; };

View File

@ -468,36 +468,70 @@ inline static Index first_aligned(const Scalar* array, Index size)
*** Implementation of runtime stack allocation (falling back to malloc) *** *** Implementation of runtime stack allocation (falling back to malloc) ***
*****************************************************************************/ *****************************************************************************/
/** \internal // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
* Allocates an aligned buffer of SIZE bytes on the stack if SIZE is smaller than // to the appropriate stack allocation function
* EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform #ifndef EIGEN_ALLOCA
* (currently, this is Linux only). Otherwise the memory is allocated on the heap. #if (defined __linux__)
* Data allocated with ei_aligned_stack_alloc \b must be freed by calling #define EIGEN_ALLOCA alloca
* ei_aligned_stack_free(PTR,SIZE). #elif defined(_MSC_VER)
* \code #define EIGEN_ALLOCA _alloca
* float * data = ei_aligned_stack_alloc(float,array.size()); #endif
* // ...
* ei_aligned_stack_free(data,float,array.size());
* \endcode
*/
#if (defined __linux__)
#define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \
? alloca(SIZE) \
: Eigen::internal::aligned_malloc(SIZE)
#define ei_aligned_stack_free(PTR,SIZE) if(SIZE>EIGEN_STACK_ALLOCATION_LIMIT) Eigen::internal::aligned_free(PTR)
#elif defined(_MSC_VER)
#define ei_aligned_stack_alloc(SIZE) (SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) \
? _alloca(SIZE) \
: Eigen::internal::aligned_malloc(SIZE)
#define ei_aligned_stack_free(PTR,SIZE) if(SIZE>EIGEN_STACK_ALLOCATION_LIMIT) Eigen::internal::aligned_free(PTR)
#else
#define ei_aligned_stack_alloc(SIZE) Eigen::internal::aligned_malloc(SIZE)
#define ei_aligned_stack_free(PTR,SIZE) Eigen::internal::aligned_free(PTR)
#endif #endif
#define ei_aligned_stack_new(TYPE,SIZE) Eigen::internal::construct_elements_of_array(reinterpret_cast<TYPE*>(ei_aligned_stack_alloc(sizeof(TYPE)*SIZE)), SIZE) namespace internal {
#define ei_aligned_stack_delete(TYPE,PTR,SIZE) do {Eigen::internal::destruct_elements_of_array<TYPE>(PTR, SIZE); \
ei_aligned_stack_free(PTR,sizeof(TYPE)*SIZE);} while(0) template<typename T> class stack_memory_destructor
{
public:
stack_memory_destructor(T* ptr,size_t size) : m_ptr(ptr), m_size(size) {}
~stack_memory_destructor()
{
Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
#ifdef EIGEN_ALLOCA
if(sizeof(T)*m_size>EIGEN_STACK_ALLOCATION_LIMIT)
#endif
Eigen::internal::aligned_free(m_ptr);
}
protected:
T* m_ptr;
size_t m_size;
};
}
/** \internal
* Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
* if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
* (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
* The allocated buffer is automatically deleted when exiting the scope of this declaration.
* If BUFFER is non nul, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
* Here is an example:
* \code
* {
* ei_declare_aligned_stack_constructed_variable(float,data,size,0);
* // use data[0] to data[size-1]
* }
* \endcode
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
*/
#ifdef EIGEN_ALLOCA
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
: reinterpret_cast<TYPE*>( \
(sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? alloca(sizeof(TYPE)*SIZE) \
: Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
if((BUFFER)==0) Eigen::internal::construct_elements_of_array(NAME, SIZE); \
Eigen::internal::stack_memory_destructor<TYPE> EIGEN_CAT(stack_memory_destructor,__LINE__)((BUFFER)==0 ? NAME : 0,SIZE)
#else
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
if((BUFFER)==0) Eigen::internal::construct_elements_of_array(NAME, SIZE); \
Eigen::internal::stack_memory_destructor<TYPE> EIGEN_CAT(stack_memory_destructor,__LINE__)((BUFFER)==0 ? NAME : 0,SIZE)
#endif
/***************************************************************************** /*****************************************************************************