mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-24 02:29:33 +08:00
Optimizations:
* faster matrix-matrix and matrix-vector products (especially for not aligned cases) * faster tridiagonalization (make it using our matrix-vector impl.) Others: * fix Flags of Map * split the test_product to two smaller ones
This commit is contained in:
parent
b32b186c14
commit
55aeb1f83a
@ -86,13 +86,15 @@ static void ei_cache_friendly_product(
|
|||||||
const int l2BlockRows = MaxL2BlockSize > rows ? rows : MaxL2BlockSize;
|
const int l2BlockRows = MaxL2BlockSize > rows ? rows : MaxL2BlockSize;
|
||||||
const int l2BlockCols = MaxL2BlockSize > cols ? cols : MaxL2BlockSize;
|
const int l2BlockCols = MaxL2BlockSize > cols ? cols : MaxL2BlockSize;
|
||||||
const int l2BlockSize = MaxL2BlockSize > size ? size : MaxL2BlockSize;
|
const int l2BlockSize = MaxL2BlockSize > size ? size : MaxL2BlockSize;
|
||||||
|
const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize;
|
||||||
|
const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
|
||||||
Scalar* __restrict__ block = 0;
|
Scalar* __restrict__ block = 0;
|
||||||
const int allocBlockSize = sizeof(Scalar)*l2BlockRows*size;
|
const int allocBlockSize = sizeof(Scalar)*l2BlockRows*size;
|
||||||
if (allocBlockSize>16000000)
|
if (allocBlockSize>16000000)
|
||||||
block = (Scalar*)malloc(allocBlockSize);
|
block = (Scalar*)malloc(allocBlockSize);
|
||||||
else
|
else
|
||||||
block = (Scalar*)alloca(allocBlockSize);
|
block = (Scalar*)alloca(allocBlockSize);
|
||||||
Scalar* __restrict__ rhsCopy = (Scalar*)alloca(sizeof(Scalar)*l2BlockSize);
|
Scalar* __restrict__ rhsCopy = (Scalar*)alloca(sizeof(Scalar)*l2BlockSizeAligned*l2BlockSizeAligned);
|
||||||
|
|
||||||
// loops on each L2 cache friendly blocks of the result
|
// loops on each L2 cache friendly blocks of the result
|
||||||
for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
|
for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
|
||||||
@ -113,8 +115,8 @@ static void ei_cache_friendly_product(
|
|||||||
|
|
||||||
for (int i = l2i; i<l2blockRowEndBW/*PlusOne*/; i+=MaxBlockRows)
|
for (int i = l2i; i<l2blockRowEndBW/*PlusOne*/; i+=MaxBlockRows)
|
||||||
{
|
{
|
||||||
// TODO merge the if l2blockRemainingRows
|
// TODO merge the "if l2blockRemainingRows" using something like:
|
||||||
// const int blockRows = std::min(i+MaxBlockRows, rows) - i;
|
// const int blockRows = std::min(i+MaxBlockRows, rows) - i;
|
||||||
|
|
||||||
for (int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
|
for (int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
|
||||||
{
|
{
|
||||||
@ -164,62 +166,58 @@ static void ei_cache_friendly_product(
|
|||||||
// acumulate bw rows of lhs time a single column of rhs to a bw x 1 block of res
|
// acumulate bw rows of lhs time a single column of rhs to a bw x 1 block of res
|
||||||
int l2blockSizeEnd = std::min(l2k+l2BlockSize, size);
|
int l2blockSizeEnd = std::min(l2k+l2BlockSize, size);
|
||||||
|
|
||||||
|
// if not aligned, copy the rhs block
|
||||||
|
if (needRhsCopy)
|
||||||
|
for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
|
||||||
|
{
|
||||||
|
ei_internal_assert(l2BlockSizeAligned*(l1j-l2j)+(l2blockSizeEnd-l2k) < l2BlockSizeAligned*l2BlockSizeAligned);
|
||||||
|
memcpy(rhsCopy+l2BlockSizeAligned*(l1j-l2j),&(rhs[l1j*rhsStride+l2k]),(l2blockSizeEnd-l2k)*sizeof(Scalar));
|
||||||
|
}
|
||||||
|
|
||||||
// for each bw x 1 result's block
|
// for each bw x 1 result's block
|
||||||
for(int l1i=l2i; l1i<l2blockRowEndBW; l1i+=MaxBlockRows)
|
for(int l1i=l2i; l1i<l2blockRowEndBW; l1i+=MaxBlockRows)
|
||||||
{
|
{
|
||||||
|
int offsetblock = l2k * (l2blockRowEnd-l2i) + (l1i-l2i)*(l2blockSizeEnd-l2k) - l2k*MaxBlockRows;
|
||||||
|
const Scalar* __restrict__ localB = &block[offsetblock];
|
||||||
|
|
||||||
for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
|
for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
|
||||||
{
|
{
|
||||||
int offsetblock = l2k * (l2blockRowEnd-l2i) + (l1i-l2i)*(l2blockSizeEnd-l2k) - l2k*MaxBlockRows;
|
const Scalar* __restrict__ rhsColumn;
|
||||||
const Scalar* __restrict__ localB = &block[offsetblock];
|
if (needRhsCopy)
|
||||||
|
rhsColumn = &(rhsCopy[l2BlockSizeAligned*(l1j-l2j)-l2k]);
|
||||||
const Scalar* __restrict__ rhsColumn = &(rhs[l1j*rhsStride]);
|
else
|
||||||
|
rhsColumn = &(rhs[l1j*rhsStride]);
|
||||||
// copy unaligned rhs data
|
|
||||||
// YES it seems to be faster to copy some part of rhs multiple times
|
|
||||||
// to aligned memory rather than using unligned load.
|
|
||||||
// Moreover this avoids a "if" in the most nested loop :)
|
|
||||||
if (PacketSize>1 && size_t(rhsColumn)%16)
|
|
||||||
{
|
|
||||||
int count = 0;
|
|
||||||
// FIXME this loop get vectorized by the compiler (ICC)
|
|
||||||
// I'm not sure thats good or not
|
|
||||||
for (int k = l2k; k<l2blockSizeEnd; ++k)
|
|
||||||
{
|
|
||||||
rhsCopy[count++] = rhsColumn[k];
|
|
||||||
}
|
|
||||||
rhsColumn = &(rhsCopy[-l2k]);
|
|
||||||
}
|
|
||||||
|
|
||||||
PacketType dst[MaxBlockRows];
|
PacketType dst[MaxBlockRows];
|
||||||
dst[0] = ei_pset1(Scalar(0.));
|
dst[3] = dst[2] = dst[1] = dst[0] = ei_pset1(Scalar(0.));
|
||||||
dst[1] = dst[0];
|
|
||||||
dst[2] = dst[0];
|
|
||||||
dst[3] = dst[0];
|
|
||||||
if (MaxBlockRows==8)
|
if (MaxBlockRows==8)
|
||||||
{
|
dst[7] = dst[6] = dst[5] = dst[4] = dst[0];
|
||||||
dst[4] = dst[0];
|
|
||||||
dst[5] = dst[0];
|
|
||||||
dst[6] = dst[0];
|
|
||||||
dst[7] = dst[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
PacketType tmp;
|
PacketType tmp;
|
||||||
|
|
||||||
asm("#eigen begincore");
|
asm("#eigen begincore");
|
||||||
for(int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
|
for(int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
|
||||||
{
|
{
|
||||||
tmp = ei_pload(&rhsColumn[k]);
|
tmp = ei_ploadu(&rhsColumn[k]);
|
||||||
|
PacketType A0, A1, A2, A3, A4, A5;
|
||||||
dst[0] = ei_pmadd(tmp, ei_pload(&(localB[k*MaxBlockRows ])), dst[0]);
|
A0 = ei_pload(localB + k*MaxBlockRows);
|
||||||
dst[1] = ei_pmadd(tmp, ei_pload(&(localB[k*MaxBlockRows+ PacketSize])), dst[1]);
|
A1 = ei_pload(localB + k*MaxBlockRows+1*PacketSize);
|
||||||
dst[2] = ei_pmadd(tmp, ei_pload(&(localB[k*MaxBlockRows+2*PacketSize])), dst[2]);
|
A2 = ei_pload(localB + k*MaxBlockRows+2*PacketSize);
|
||||||
dst[3] = ei_pmadd(tmp, ei_pload(&(localB[k*MaxBlockRows+3*PacketSize])), dst[3]);
|
A3 = ei_pload(localB + k*MaxBlockRows+3*PacketSize);
|
||||||
|
if (MaxBlockRows==8) A4 = ei_pload(localB + k*MaxBlockRows+4*PacketSize);
|
||||||
|
if (MaxBlockRows==8) A5 = ei_pload(localB + k*MaxBlockRows+5*PacketSize);
|
||||||
|
dst[0] = ei_pmadd(tmp, A0, dst[0]);
|
||||||
|
if (MaxBlockRows==8) A0 = ei_pload(localB + k*MaxBlockRows+6*PacketSize);
|
||||||
|
dst[1] = ei_pmadd(tmp, A1, dst[1]);
|
||||||
|
if (MaxBlockRows==8) A1 = ei_pload(localB + k*MaxBlockRows+7*PacketSize);
|
||||||
|
dst[2] = ei_pmadd(tmp, A2, dst[2]);
|
||||||
|
dst[3] = ei_pmadd(tmp, A3, dst[3]);
|
||||||
if (MaxBlockRows==8)
|
if (MaxBlockRows==8)
|
||||||
{
|
{
|
||||||
dst[4] = ei_pmadd(tmp, ei_pload(&(localB[k*MaxBlockRows+4*PacketSize])), dst[4]);
|
dst[4] = ei_pmadd(tmp, A4, dst[4]);
|
||||||
dst[5] = ei_pmadd(tmp, ei_pload(&(localB[k*MaxBlockRows+5*PacketSize])), dst[5]);
|
dst[5] = ei_pmadd(tmp, A5, dst[5]);
|
||||||
dst[6] = ei_pmadd(tmp, ei_pload(&(localB[k*MaxBlockRows+6*PacketSize])), dst[6]);
|
dst[6] = ei_pmadd(tmp, A0, dst[6]);
|
||||||
dst[7] = ei_pmadd(tmp, ei_pload(&(localB[k*MaxBlockRows+7*PacketSize])), dst[7]);
|
dst[7] = ei_pmadd(tmp, A1, dst[7]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -227,7 +225,8 @@ static void ei_cache_friendly_product(
|
|||||||
|
|
||||||
if (PacketSize>1 && resIsAligned)
|
if (PacketSize>1 && resIsAligned)
|
||||||
{
|
{
|
||||||
ei_pstore(&(localRes[0]), ei_padd(ei_pload(&(localRes[0])), ei_preduxp(dst)));
|
// the result is aligned: let's do packet reduction
|
||||||
|
ei_pstore(&(localRes[0]), ei_padd(ei_pload(&(localRes[0])), ei_preduxp(&dst[0])));
|
||||||
if (PacketSize==2)
|
if (PacketSize==2)
|
||||||
ei_pstore(&(localRes[2]), ei_padd(ei_pload(&(localRes[2])), ei_preduxp(&(dst[2]))));
|
ei_pstore(&(localRes[2]), ei_padd(ei_pload(&(localRes[2])), ei_preduxp(&(dst[2]))));
|
||||||
if (MaxBlockRows==8)
|
if (MaxBlockRows==8)
|
||||||
@ -239,6 +238,7 @@ static void ei_cache_friendly_product(
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// not aligned => per coeff packet reduction
|
||||||
localRes[0] += ei_predux(dst[0]);
|
localRes[0] += ei_predux(dst[0]);
|
||||||
localRes[1] += ei_predux(dst[1]);
|
localRes[1] += ei_predux(dst[1]);
|
||||||
localRes[2] += ei_predux(dst[2]);
|
localRes[2] += ei_predux(dst[2]);
|
||||||
@ -262,32 +262,16 @@ static void ei_cache_friendly_product(
|
|||||||
asm("#eigen begin dynkernel");
|
asm("#eigen begin dynkernel");
|
||||||
for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
|
for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
|
||||||
{
|
{
|
||||||
const Scalar* __restrict__ rhsColumn = &(rhs[l1j*rhsStride]);
|
const Scalar* __restrict__ rhsColumn;
|
||||||
|
if (needRhsCopy)
|
||||||
// copy unaligned rhs data
|
rhsColumn = &(rhsCopy[l2BlockSizeAligned*(l1j-l2j)-l2k]);
|
||||||
if (PacketSize>1 && size_t(rhsColumn)%16)
|
else
|
||||||
{
|
rhsColumn = &(rhs[l1j*rhsStride]);
|
||||||
int count = 0;
|
|
||||||
// FIXME this loop get vectorized by the compiler !
|
|
||||||
for (int k = l2k; k<l2blockSizeEnd; ++k)
|
|
||||||
{
|
|
||||||
rhsCopy[count++] = rhsColumn[k];
|
|
||||||
}
|
|
||||||
rhsColumn = &(rhsCopy[-l2k]);
|
|
||||||
}
|
|
||||||
|
|
||||||
PacketType dst[MaxBlockRows];
|
PacketType dst[MaxBlockRows];
|
||||||
dst[0] = ei_pset1(Scalar(0.));
|
dst[3] = dst[2] = dst[1] = dst[0] = ei_pset1(Scalar(0.));
|
||||||
dst[1] = dst[0];
|
if (MaxBlockRows==8)
|
||||||
dst[2] = dst[0];
|
dst[7] = dst[6] = dst[5] = dst[4] = dst[0];
|
||||||
dst[3] = dst[0];
|
|
||||||
if (MaxBlockRows>4)
|
|
||||||
{
|
|
||||||
dst[4] = dst[0];
|
|
||||||
dst[5] = dst[0];
|
|
||||||
dst[6] = dst[0];
|
|
||||||
dst[7] = dst[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
// let's declare a few other temporary registers
|
// let's declare a few other temporary registers
|
||||||
PacketType tmp;
|
PacketType tmp;
|
||||||
@ -300,7 +284,7 @@ static void ei_cache_friendly_product(
|
|||||||
if (l2blockRemainingRows>=2) dst[1] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+ PacketSize])), dst[1]);
|
if (l2blockRemainingRows>=2) dst[1] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+ PacketSize])), dst[1]);
|
||||||
if (l2blockRemainingRows>=3) dst[2] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+2*PacketSize])), dst[2]);
|
if (l2blockRemainingRows>=3) dst[2] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+2*PacketSize])), dst[2]);
|
||||||
if (l2blockRemainingRows>=4) dst[3] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+3*PacketSize])), dst[3]);
|
if (l2blockRemainingRows>=4) dst[3] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+3*PacketSize])), dst[3]);
|
||||||
if (MaxBlockRows>4)
|
if (MaxBlockRows==8)
|
||||||
{
|
{
|
||||||
if (l2blockRemainingRows>=5) dst[4] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+4*PacketSize])), dst[4]);
|
if (l2blockRemainingRows>=5) dst[4] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+4*PacketSize])), dst[4]);
|
||||||
if (l2blockRemainingRows>=6) dst[5] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+5*PacketSize])), dst[5]);
|
if (l2blockRemainingRows>=6) dst[5] = ei_pmadd(tmp, ei_pload(&(localB[k*l2blockRemainingRows+5*PacketSize])), dst[5]);
|
||||||
@ -316,7 +300,7 @@ static void ei_cache_friendly_product(
|
|||||||
if (l2blockRemainingRows>=2) localRes[1] += ei_predux(dst[1]);
|
if (l2blockRemainingRows>=2) localRes[1] += ei_predux(dst[1]);
|
||||||
if (l2blockRemainingRows>=3) localRes[2] += ei_predux(dst[2]);
|
if (l2blockRemainingRows>=3) localRes[2] += ei_predux(dst[2]);
|
||||||
if (l2blockRemainingRows>=4) localRes[3] += ei_predux(dst[3]);
|
if (l2blockRemainingRows>=4) localRes[3] += ei_predux(dst[3]);
|
||||||
if (MaxBlockRows>4)
|
if (MaxBlockRows==8)
|
||||||
{
|
{
|
||||||
if (l2blockRemainingRows>=5) localRes[4] += ei_predux(dst[4]);
|
if (l2blockRemainingRows>=5) localRes[4] += ei_predux(dst[4]);
|
||||||
if (l2blockRemainingRows>=6) localRes[5] += ei_predux(dst[5]);
|
if (l2blockRemainingRows>=6) localRes[5] += ei_predux(dst[5]);
|
||||||
@ -573,16 +557,16 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
|
|||||||
|
|
||||||
enum { AllAligned, EvenAligned, FirstAligned, NoneAligned };
|
enum { AllAligned, EvenAligned, FirstAligned, NoneAligned };
|
||||||
const int rowsAtOnce = 4;
|
const int rowsAtOnce = 4;
|
||||||
// const int peels = 2;
|
const int peels = 2;
|
||||||
const int PacketAlignedMask = PacketSize-1;
|
const int PacketAlignedMask = PacketSize-1;
|
||||||
// const int PeelAlignedMask = PacketSize*peels-1;
|
const int PeelAlignedMask = PacketSize*peels-1;
|
||||||
const int size = rhsSize;
|
const int size = rhsSize;
|
||||||
|
|
||||||
// How many coeffs of the result do we have to skip to be aligned.
|
// How many coeffs of the result do we have to skip to be aligned.
|
||||||
// Here we assume data are at least aligned on the base scalar type that is mandatory anyway.
|
// Here we assume data are at least aligned on the base scalar type that is mandatory anyway.
|
||||||
const int alignedStart = ei_alignmentOffset(rhs, size);
|
const int alignedStart = ei_alignmentOffset(rhs, size);
|
||||||
const int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
|
const int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
|
||||||
//const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : 0;
|
const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
||||||
|
|
||||||
const int alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0;
|
const int alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0;
|
||||||
int alignmentPattern = alignmentStep==0 ? AllAligned
|
int alignmentPattern = alignmentStep==0 ? AllAligned
|
||||||
@ -650,7 +634,32 @@ EIGEN_DONT_INLINE static void ei_cache_friendly_product_rowmajor_times_vector(
|
|||||||
_EIGEN_ACCUMULATE_PACKETS(,u,,);
|
_EIGEN_ACCUMULATE_PACKETS(,u,,);
|
||||||
break;
|
break;
|
||||||
case FirstAligned:
|
case FirstAligned:
|
||||||
for (int j = alignedStart; j<alignedSize; j+=PacketSize)
|
if (peels>1)
|
||||||
|
{
|
||||||
|
Packet A01, A02, A03, b;
|
||||||
|
for (int j = alignedStart; j<peeledSize; j+=peels*PacketSize)
|
||||||
|
{
|
||||||
|
b = ei_pload(&rhs[j]);
|
||||||
|
A01 = ei_ploadu(&lhs1[j]);
|
||||||
|
A02 = ei_ploadu(&lhs2[j]);
|
||||||
|
A03 = ei_ploadu(&lhs3[j]);
|
||||||
|
|
||||||
|
ptmp0 = ei_pmadd(b, ei_pload (&lhs0[j]), ptmp0);
|
||||||
|
ptmp1 = ei_pmadd(b, A01, ptmp1);
|
||||||
|
A01 = ei_ploadu(&lhs1[j+PacketSize]);
|
||||||
|
ptmp2 = ei_pmadd(b, A02, ptmp2);
|
||||||
|
A02 = ei_ploadu(&lhs2[j+PacketSize]);
|
||||||
|
ptmp3 = ei_pmadd(b, A03, ptmp3);
|
||||||
|
A03 = ei_ploadu(&lhs3[j+PacketSize]);
|
||||||
|
|
||||||
|
b = ei_pload(&rhs[j+PacketSize]);
|
||||||
|
ptmp0 = ei_pmadd(b, ei_pload (&lhs0[j+PacketSize]), ptmp0);
|
||||||
|
ptmp1 = ei_pmadd(b, A01, ptmp1);
|
||||||
|
ptmp2 = ei_pmadd(b, A02, ptmp2);
|
||||||
|
ptmp3 = ei_pmadd(b, A03, ptmp3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int j = peeledSize; j<alignedSize; j+=PacketSize)
|
||||||
_EIGEN_ACCUMULATE_PACKETS(,u,u,);
|
_EIGEN_ACCUMULATE_PACKETS(,u,u,);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -49,9 +49,7 @@ struct ei_traits<Map<MatrixType, Alignment> >
|
|||||||
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
ColsAtCompileTime = MatrixType::ColsAtCompileTime,
|
||||||
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
|
||||||
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
|
||||||
Flags = MatrixType::Flags
|
Flags = MatrixType::Flags,
|
||||||
& ( (HereditaryBits | LinearAccessBit | DirectAccessBit)
|
|
||||||
| (Alignment == Aligned ? PacketAccessBit : 0) ),
|
|
||||||
CoeffReadCost = NumTraits<Scalar>::ReadCost
|
CoeffReadCost = NumTraits<Scalar>::ReadCost
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -192,12 +192,12 @@ inline __m128i ei_preduxp(const __m128i* vecs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if (defined __GNUC__)
|
#if (defined __GNUC__)
|
||||||
template <> inline __m128 ei_pmadd(const __m128& a, const __m128& b, const __m128& c)
|
// template <> inline __m128 ei_pmadd(const __m128& a, const __m128& b, const __m128& c)
|
||||||
{
|
// {
|
||||||
__m128 res = b;
|
// __m128 res = b;
|
||||||
asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
|
// asm("mulps %[a], %[b] \n\taddps %[c], %[b]" : [b] "+x" (res) : [a] "x" (a), [c] "x" (c));
|
||||||
return res;
|
// return res;
|
||||||
}
|
// }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // EIGEN_PACKET_MATH_SSE_H
|
#endif // EIGEN_PACKET_MATH_SSE_H
|
||||||
|
@ -34,7 +34,7 @@
|
|||||||
* \param MatrixType the type of the matrix of which we are performing the tridiagonalization
|
* \param MatrixType the type of the matrix of which we are performing the tridiagonalization
|
||||||
*
|
*
|
||||||
* This class performs a tridiagonal decomposition of a selfadjoint matrix \f$ A \f$ such that:
|
* This class performs a tridiagonal decomposition of a selfadjoint matrix \f$ A \f$ such that:
|
||||||
* \f$ A = Q T Q^* \f$ where \f$ Q \f$ is unitatry and \f$ T \f$ a real symmetric tridiagonal matrix
|
* \f$ A = Q T Q^* \f$ where \f$ Q \f$ is unitary and \f$ T \f$ a real symmetric tridiagonal matrix.
|
||||||
*
|
*
|
||||||
* \sa MatrixBase::tridiagonalize()
|
* \sa MatrixBase::tridiagonalize()
|
||||||
*/
|
*/
|
||||||
@ -45,12 +45,15 @@ template<typename _MatrixType> class Tridiagonalization
|
|||||||
typedef _MatrixType MatrixType;
|
typedef _MatrixType MatrixType;
|
||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||||
|
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
Size = MatrixType::RowsAtCompileTime,
|
Size = MatrixType::RowsAtCompileTime,
|
||||||
SizeMinusOne = MatrixType::RowsAtCompileTime==Dynamic
|
SizeMinusOne = MatrixType::RowsAtCompileTime==Dynamic
|
||||||
? Dynamic
|
? Dynamic
|
||||||
: MatrixType::RowsAtCompileTime-1};
|
: MatrixType::RowsAtCompileTime-1,
|
||||||
|
PacketSize = ei_packet_traits<Scalar>::size
|
||||||
|
};
|
||||||
|
|
||||||
typedef Matrix<Scalar, SizeMinusOne, 1> CoeffVectorType;
|
typedef Matrix<Scalar, SizeMinusOne, 1> CoeffVectorType;
|
||||||
typedef Matrix<RealScalar, Size, 1> DiagonalType;
|
typedef Matrix<RealScalar, Size, 1> DiagonalType;
|
||||||
@ -59,8 +62,7 @@ template<typename _MatrixType> class Tridiagonalization
|
|||||||
typedef typename NestByValue<DiagonalCoeffs<MatrixType> >::RealReturnType DiagonalReturnType;
|
typedef typename NestByValue<DiagonalCoeffs<MatrixType> >::RealReturnType DiagonalReturnType;
|
||||||
|
|
||||||
typedef typename NestByValue<DiagonalCoeffs<
|
typedef typename NestByValue<DiagonalCoeffs<
|
||||||
NestByValue<Block<
|
NestByValue<Block<MatrixType,SizeMinusOne,SizeMinusOne> > > >::RealReturnType SubDiagonalReturnType;
|
||||||
MatrixType,SizeMinusOne,SizeMinusOne> > > >::RealReturnType SubDiagonalReturnType;
|
|
||||||
|
|
||||||
/** This constructor initializes a Tridiagonalization object for
|
/** This constructor initializes a Tridiagonalization object for
|
||||||
* further use with Tridiagonalization::compute()
|
* further use with Tridiagonalization::compute()
|
||||||
@ -103,7 +105,7 @@ template<typename _MatrixType> class Tridiagonalization
|
|||||||
* Householder coefficients returned by householderCoefficients(),
|
* Householder coefficients returned by householderCoefficients(),
|
||||||
* allows to reconstruct the matrix Q as follow:
|
* allows to reconstruct the matrix Q as follow:
|
||||||
* Q = H_{N-1} ... H_1 H_0
|
* Q = H_{N-1} ... H_1 H_0
|
||||||
* where the matrices H are the Householder transformation:
|
* where the matrices H are the Householder transformations:
|
||||||
* H_i = (I - h_i * v_i * v_i')
|
* H_i = (I - h_i * v_i * v_i')
|
||||||
* where h_i == householderCoefficients()[i] and v_i is a Householder vector:
|
* where h_i == householderCoefficients()[i] and v_i is a Householder vector:
|
||||||
* v_i = [ 0, ..., 0, 1, M(i+2,i), ..., M(N-1,i) ]
|
* v_i = [ 0, ..., 0, 1, M(i+2,i), ..., M(N-1,i) ]
|
||||||
@ -157,8 +159,8 @@ template<typename MatrixType>
|
|||||||
typename Tridiagonalization<MatrixType>::MatrixType
|
typename Tridiagonalization<MatrixType>::MatrixType
|
||||||
Tridiagonalization<MatrixType>::matrixT(void) const
|
Tridiagonalization<MatrixType>::matrixT(void) const
|
||||||
{
|
{
|
||||||
// FIXME should this function (and other similar) rather take a matrix as argument
|
// FIXME should this function (and other similar ones) rather take a matrix as argument
|
||||||
// and fill it (avoids temporaries)
|
// and fill it ? (to avoid temporaries)
|
||||||
int n = m_matrix.rows();
|
int n = m_matrix.rows();
|
||||||
MatrixType matT = m_matrix;
|
MatrixType matT = m_matrix;
|
||||||
matT.corner(TopRight,n-1, n-1).diagonal() = subDiagonal().conjugate();
|
matT.corner(TopRight,n-1, n-1).diagonal() = subDiagonal().conjugate();
|
||||||
@ -189,6 +191,7 @@ void Tridiagonalization<MatrixType>::_compute(MatrixType& matA, CoeffVectorType&
|
|||||||
{
|
{
|
||||||
assert(matA.rows()==matA.cols());
|
assert(matA.rows()==matA.cols());
|
||||||
int n = matA.rows();
|
int n = matA.rows();
|
||||||
|
// std::cerr << matA << "\n\n";
|
||||||
for (int i = 0; i<n-2; ++i)
|
for (int i = 0; i<n-2; ++i)
|
||||||
{
|
{
|
||||||
// let's consider the vector v = i-th column starting at position i+1
|
// let's consider the vector v = i-th column starting at position i+1
|
||||||
@ -216,22 +219,100 @@ void Tridiagonalization<MatrixType>::_compute(MatrixType& matA, CoeffVectorType&
|
|||||||
// i.e., A = H' A H where H = I - h v v' and v = matA.col(i).end(n-i-1)
|
// i.e., A = H' A H where H = I - h v v' and v = matA.col(i).end(n-i-1)
|
||||||
|
|
||||||
matA.col(i).coeffRef(i+1) = 1;
|
matA.col(i).coeffRef(i+1) = 1;
|
||||||
// let's use the end of hCoeffs to store temporary values
|
|
||||||
hCoeffs.end(n-i-1) = h * (matA.corner(BottomRight,n-i-1,n-i-1).template part<Lower|SelfAdjoint>()
|
|
||||||
* matA.col(i).end(n-i-1));
|
|
||||||
|
|
||||||
|
/* This is the initial algorithm which minimize operation counts and maximize
|
||||||
|
* the use of Eigen's expression. Unfortunately, the first matrix-vector product
|
||||||
|
* using Part<Lower|Selfadjoint> is very very slow */
|
||||||
|
#ifdef EIGEN_NEVER_DEFINED
|
||||||
|
// matrix - vector product
|
||||||
|
hCoeffs.end(n-i-1) = (matA.corner(BottomRight,n-i-1,n-i-1).template part<Lower|SelfAdjoint>()
|
||||||
|
* (h * matA.col(i).end(n-i-1))).lazy();
|
||||||
|
// simple axpy
|
||||||
|
hCoeffs.end(n-i-1) += (h * Scalar(-0.5) * matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1)))
|
||||||
|
* matA.col(i).end(n-i-1);
|
||||||
|
// rank-2 update
|
||||||
|
//Block<MatrixType,Dynamic,1> B(matA,i+1,i,n-i-1,1);
|
||||||
|
matA.corner(BottomRight,n-i-1,n-i-1).template part<Lower>() -=
|
||||||
|
(matA.col(i).end(n-i-1) * hCoeffs.end(n-i-1).adjoint()).lazy()
|
||||||
|
+ (hCoeffs.end(n-i-1) * matA.col(i).end(n-i-1).adjoint()).lazy();
|
||||||
|
#endif
|
||||||
|
/* end initial algorithm */
|
||||||
|
|
||||||
|
/* If we still want to minimize operation count (i.e., perform operation on the lower part only)
|
||||||
|
* then we could provide the following algorithm for selfadjoint - vector product. However, a full
|
||||||
|
* matrix-vector product is still faster (at least for dynamic size, and not too small, did not check
|
||||||
|
* small matrices). The algo performs block matrix-vector and transposed matrix vector products. */
|
||||||
|
#ifdef EIGEN_NEVER_DEFINED
|
||||||
|
int n4 = (std::max(0,n-4)/4)*4;
|
||||||
|
hCoeffs.end(n-i-1).setZero();
|
||||||
|
for (int b=i+1; b<n4; b+=4)
|
||||||
|
{
|
||||||
|
// the ?x4 part:
|
||||||
|
hCoeffs.end(b-4) +=
|
||||||
|
Block<MatrixType,Dynamic,4>(matA,b+4,b,n-b-4,4) * matA.template block<4,1>(b,i);
|
||||||
|
// the respective transposed part:
|
||||||
|
Block<CoeffVectorType,4,1>(hCoeffs, b, 0, 4,1) +=
|
||||||
|
Block<MatrixType,Dynamic,4>(matA,b+4,b,n-b-4,4).adjoint() * Block<MatrixType,Dynamic,1>(matA,b+4,i,n-b-4,1);
|
||||||
|
// the 4x4 block diagonal:
|
||||||
|
Block<CoeffVectorType,4,1>(hCoeffs, b, 0, 4,1) +=
|
||||||
|
(Block<MatrixType,4,4>(matA,b,b,4,4).template part<Lower|SelfAdjoint>()
|
||||||
|
* (h * Block<MatrixType,4,1>(matA,b,i,4,1))).lazy();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
// todo: handle the remaining part
|
||||||
|
/* end optimized selfadjoint - vector product */
|
||||||
|
|
||||||
|
/* Another interesting note: the above rank-2 update is much slower than the following hand written loop.
|
||||||
|
* After an analyse of the ASM, it seems GCC (4.2) generate poor code because of the Block. Moreover,
|
||||||
|
* if we remove the specialization of Block for Matrix then it is even worse, much worse ! */
|
||||||
|
#ifdef EIGEN_NEVER_DEFINED
|
||||||
|
for (int j1=i+1; j1<n; ++j1)
|
||||||
|
for (int i1=j1; i1<n; i1++)
|
||||||
|
matA.coeffRef(i1,j1) -= matA.coeff(i1,i)*ei_conj(hCoeffs.coeff(j1-1))
|
||||||
|
+ hCoeffs.coeff(i1-1)*ei_conj(matA.coeff(j1,i));
|
||||||
|
#endif
|
||||||
|
/* end hand writen partial rank-2 update */
|
||||||
|
|
||||||
|
/* The current fastest implementation: the full matrix is used, no "optimization" to use/compute
|
||||||
|
* only half of the matrix. Custom vectorization of the inner col -= alpha X + beta Y such that access
|
||||||
|
* to col are always aligned. Once we support that in Assign, then the algorithm could be rewriten as
|
||||||
|
* a single compact expression. This code is therefore a good benchmark when will do that. */
|
||||||
|
|
||||||
|
// let's use the end of hCoeffs to store temporary values:
|
||||||
|
hCoeffs.end(n-i-1) = (matA.corner(BottomRight,n-i-1,n-i-1) * (h * matA.col(i).end(n-i-1))).lazy();
|
||||||
|
// FIXME in the above expr a temporary is created because of the scalar multiple by h
|
||||||
|
|
||||||
hCoeffs.end(n-i-1) += (h * Scalar(-0.5) * matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1)))
|
hCoeffs.end(n-i-1) += (h * Scalar(-0.5) * matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1)))
|
||||||
* matA.col(i).end(n-i-1);
|
* matA.col(i).end(n-i-1);
|
||||||
|
|
||||||
matA.corner(BottomRight,n-i-1,n-i-1).template part<Lower>() =
|
const Scalar* __restrict__ pb = &matA.coeffRef(0,i);
|
||||||
matA.corner(BottomRight,n-i-1,n-i-1) - (
|
const Scalar* __restrict__ pa = (&hCoeffs.coeffRef(0)) - 1;
|
||||||
(matA.col(i).end(n-i-1) * hCoeffs.end(n-i-1).adjoint()).lazy()
|
for (int j1=i+1; j1<n; ++j1)
|
||||||
+ (hCoeffs.end(n-i-1) * matA.col(i).end(n-i-1).adjoint()).lazy() );
|
{
|
||||||
// FIXME check that the above expression does follow the lazy path (no temporary and
|
int starti = i+1;
|
||||||
// only lower products are evaluated)
|
int alignedEnd = starti;
|
||||||
// FIXME can we avoid to evaluate twice the diagonal products ?
|
if (PacketSize>1)
|
||||||
// (in a simple way otherwise it's overkill)
|
{
|
||||||
|
int alignedStart = (starti) + ei_alignmentOffset(&matA.coeffRef(starti,j1), n-starti);
|
||||||
|
alignedEnd = alignedStart + ((n-alignedStart)/PacketSize)*PacketSize;
|
||||||
|
|
||||||
|
for (int i1=starti; i1<alignedStart; ++i1)
|
||||||
|
matA.coeffRef(i1,j1) -= matA.coeff(i1,i)*ei_conj(hCoeffs.coeff(j1-1))
|
||||||
|
+ hCoeffs.coeff(i1-1)*ei_conj(matA.coeff(j1,i));
|
||||||
|
|
||||||
|
Packet tmp0 = ei_pset1(hCoeffs.coeff(j1-1));
|
||||||
|
Packet tmp1 = ei_pset1(matA.coeff(j1,i));
|
||||||
|
Scalar* pc = &matA.coeffRef(0,j1);
|
||||||
|
for (int i1=alignedStart ; i1<alignedEnd; i1+=PacketSize)
|
||||||
|
ei_pstore(pc+i1,ei_psub(ei_pload(pc+i1),
|
||||||
|
ei_padd(ei_pmul(tmp0, ei_ploadu(pb+i1)),
|
||||||
|
ei_pmul(tmp1, ei_ploadu(pa+i1)))));
|
||||||
|
}
|
||||||
|
for (int i1=alignedEnd; i1<n; ++i1)
|
||||||
|
matA.coeffRef(i1,j1) -= matA.coeff(i1,i)*ei_conj(hCoeffs.coeff(j1-1))
|
||||||
|
+ hCoeffs.coeff(i1-1)*ei_conj(matA.coeff(j1,i));
|
||||||
|
}
|
||||||
|
/* end optimized implemenation */
|
||||||
|
|
||||||
// note: at that point matA(i+1,i+1) is the (i+1)-th element of the final diagonal
|
// note: at that point matA(i+1,i+1) is the (i+1)-th element of the final diagonal
|
||||||
// note: the sequence of the beta values leads to the subdiagonal entries
|
// note: the sequence of the beta values leads to the subdiagonal entries
|
||||||
@ -286,7 +367,6 @@ void Tridiagonalization<MatrixType>::decomposeInPlace(MatrixType& mat, DiagonalT
|
|||||||
ei_assert(mat.cols()==n && diag.size()==n && subdiag.size()==n-1);
|
ei_assert(mat.cols()==n && diag.size()==n && subdiag.size()==n-1);
|
||||||
if (n==3 && (!NumTraits<Scalar>::IsComplex) )
|
if (n==3 && (!NumTraits<Scalar>::IsComplex) )
|
||||||
{
|
{
|
||||||
Tridiagonalization tridiag(mat);
|
|
||||||
_decomposeInPlace3x3(mat, diag, subdiag, extractQ);
|
_decomposeInPlace3x3(mat, diag, subdiag, extractQ);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -301,7 +381,7 @@ void Tridiagonalization<MatrixType>::decomposeInPlace(MatrixType& mat, DiagonalT
|
|||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* Optimized path for 3x3 matrices.
|
* Optimized path for 3x3 matrices.
|
||||||
* Especially usefull for plane fit.
|
* Especially useful for plane fitting.
|
||||||
*/
|
*/
|
||||||
template<typename MatrixType>
|
template<typename MatrixType>
|
||||||
void Tridiagonalization<MatrixType>::_decomposeInPlace3x3(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
|
void Tridiagonalization<MatrixType>::_decomposeInPlace3x3(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
|
||||||
|
@ -38,7 +38,12 @@ MACRO(EI_ADD_TEST testname)
|
|||||||
|
|
||||||
SET(targetname test_${testname})
|
SET(targetname test_${testname})
|
||||||
|
|
||||||
ADD_EXECUTABLE(${targetname} ${testname}.cpp)
|
IF(${ARGC} EQUAL 2)
|
||||||
|
SET(filename ${ARGV1})
|
||||||
|
ELSE(${ARGC} EQUAL 2)
|
||||||
|
SET(filename ${testname}.cpp)
|
||||||
|
ENDIF(${ARGC} EQUAL 2)
|
||||||
|
ADD_EXECUTABLE(${targetname} ${filename})
|
||||||
|
|
||||||
IF(NOT EIGEN_NO_ASSERTION_CHECKING)
|
IF(NOT EIGEN_NO_ASSERTION_CHECKING)
|
||||||
|
|
||||||
@ -80,7 +85,8 @@ EI_ADD_TEST(nomalloc)
|
|||||||
EI_ADD_TEST(basicstuff)
|
EI_ADD_TEST(basicstuff)
|
||||||
EI_ADD_TEST(linearstructure)
|
EI_ADD_TEST(linearstructure)
|
||||||
EI_ADD_TEST(cwiseop)
|
EI_ADD_TEST(cwiseop)
|
||||||
EI_ADD_TEST(product)
|
EI_ADD_TEST(product_small)
|
||||||
|
EI_ADD_TEST(product_large)
|
||||||
EI_ADD_TEST(adjoint)
|
EI_ADD_TEST(adjoint)
|
||||||
EI_ADD_TEST(submatrices)
|
EI_ADD_TEST(submatrices)
|
||||||
EI_ADD_TEST(miscmatrices)
|
EI_ADD_TEST(miscmatrices)
|
||||||
|
@ -63,8 +63,8 @@ void test_eigensolver()
|
|||||||
// very important to test a 3x3 matrix since we provide a special path for it
|
// very important to test a 3x3 matrix since we provide a special path for it
|
||||||
CALL_SUBTEST( eigensolver(Matrix3f()) );
|
CALL_SUBTEST( eigensolver(Matrix3f()) );
|
||||||
CALL_SUBTEST( eigensolver(Matrix4d()) );
|
CALL_SUBTEST( eigensolver(Matrix4d()) );
|
||||||
CALL_SUBTEST( eigensolver(MatrixXd(7,7)) );
|
CALL_SUBTEST( eigensolver(MatrixXf(7,7)) );
|
||||||
CALL_SUBTEST( eigensolver(MatrixXcd(6,6)) );
|
CALL_SUBTEST( eigensolver(MatrixXcd(6,6)) );
|
||||||
CALL_SUBTEST( eigensolver(MatrixXcd(3,3)) );
|
CALL_SUBTEST( eigensolver(MatrixXcf(3,3)) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -144,25 +144,3 @@ template<typename MatrixType> void product(const MatrixType& m)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_product()
|
|
||||||
{
|
|
||||||
for(int i = 0; i < g_repeat; i++) {
|
|
||||||
CALL_SUBTEST( product(Matrix3i()) );
|
|
||||||
CALL_SUBTEST( product(Matrix<float, 3, 2>()) );
|
|
||||||
CALL_SUBTEST( product(Matrix4d()) );
|
|
||||||
CALL_SUBTEST( product(Matrix4f()) );
|
|
||||||
CALL_SUBTEST( product(MatrixXf(3,5)) );
|
|
||||||
CALL_SUBTEST( product(MatrixXi(28,39)) );
|
|
||||||
}
|
|
||||||
for(int i = 0; i < g_repeat; i++) {
|
|
||||||
CALL_SUBTEST( product(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) );
|
|
||||||
CALL_SUBTEST( product(MatrixXd(ei_random<int>(1,320), ei_random<int>(1,320))) );
|
|
||||||
CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,256), ei_random<int>(1,256))) );
|
|
||||||
CALL_SUBTEST( product(MatrixXcf(ei_random<int>(1,50), ei_random<int>(1,50))) );
|
|
||||||
#ifndef EIGEN_DEFAULT_TO_ROW_MAJOR
|
|
||||||
CALL_SUBTEST( product(Matrix<float,Dynamic,Dynamic,Dynamic,Dynamic,RowMajorBit>(ei_random<int>(1,320), ei_random<int>(1,320))) );
|
|
||||||
#else
|
|
||||||
CALL_SUBTEST( product(Matrix<float,Dynamic,Dynamic,Dynamic,Dynamic,0>(ei_random<int>(1,320), ei_random<int>(1,320))) );
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
40
test/product_large.cpp
Normal file
40
test/product_large.cpp
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra. Eigen itself is part of the KDE project.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||||
|
//
|
||||||
|
// Eigen is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
|
// License as published by the Free Software Foundation; either
|
||||||
|
// version 3 of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Alternatively, you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License as
|
||||||
|
// published by the Free Software Foundation; either version 2 of
|
||||||
|
// the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public
|
||||||
|
// License and a copy of the GNU General Public License along with
|
||||||
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#include "product.h"
|
||||||
|
|
||||||
|
void test_product_large()
|
||||||
|
{
|
||||||
|
for(int i = 0; i < g_repeat; i++) {
|
||||||
|
CALL_SUBTEST( product(MatrixXf(ei_random<int>(1,320), ei_random<int>(1,320))) );
|
||||||
|
CALL_SUBTEST( product(MatrixXd(ei_random<int>(1,320), ei_random<int>(1,320))) );
|
||||||
|
CALL_SUBTEST( product(MatrixXi(ei_random<int>(1,320), ei_random<int>(1,320))) );
|
||||||
|
CALL_SUBTEST( product(MatrixXcf(ei_random<int>(1,50), ei_random<int>(1,50))) );
|
||||||
|
#ifndef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||||
|
CALL_SUBTEST( product(Matrix<float,Dynamic,Dynamic,Dynamic,Dynamic,RowMajorBit>(ei_random<int>(1,320), ei_random<int>(1,320))) );
|
||||||
|
#else
|
||||||
|
CALL_SUBTEST( product(Matrix<float,Dynamic,Dynamic,Dynamic,Dynamic,0>(ei_random<int>(1,320), ei_random<int>(1,320))) );
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
35
test/product_small.cpp
Normal file
35
test/product_small.cpp
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra. Eigen itself is part of the KDE project.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob@math.jussieu.fr>
|
||||||
|
//
|
||||||
|
// Eigen is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
|
// License as published by the Free Software Foundation; either
|
||||||
|
// version 3 of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Alternatively, you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License as
|
||||||
|
// published by the Free Software Foundation; either version 2 of
|
||||||
|
// the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public
|
||||||
|
// License and a copy of the GNU General Public License along with
|
||||||
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#include "product.h"
|
||||||
|
|
||||||
|
void test_product_small()
|
||||||
|
{
|
||||||
|
for(int i = 0; i < g_repeat; i++) {
|
||||||
|
CALL_SUBTEST( product(Matrix<float, 3, 2>()) );
|
||||||
|
CALL_SUBTEST( product(Matrix<int, 3, 5>()) );
|
||||||
|
CALL_SUBTEST( product(Matrix4d()) );
|
||||||
|
CALL_SUBTEST( product(Matrix4f()) );
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user