Fixed compilation error

This commit is contained in:
Benoit Steiner 2016-05-01 14:48:01 -07:00
parent d6c9596fd8
commit 8a9228ed9b

View File

@ -543,12 +543,12 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
#define prefetch_lhs(reg, row, col) \
if (!CHECK_LHS_BOUNDARY) { \
if (col < k_size) { \
reg =lhs.loadPacket(row, col); \
reg =lhs.loadPacket<Unaligned>(row, col); \
} \
} else { \
if (col < k_size) { \
if (row + 3 < m_size) { \
reg =lhs.loadPacket(row, col); \
reg =lhs.loadPacket<Unaligned>(row, col); \
} else if (row + 2 < m_size) { \
reg.x =lhs(row + 0, col); \
reg.y =lhs(row + 1, col); \
@ -578,7 +578,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
if (!CHECK_RHS_BOUNDARY) {
if ((rhs_vert + 3) < k_size) {
// just CHECK_RHS_BOUNDARY
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
} else if (rhs_vert + 2 < k_size) {
// just CHECK_RHS_BOUNDARY
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
@ -593,7 +593,7 @@ EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rh
} else {
if (rhs_horiz0 < n_size) {
if ((rhs_vert + 3) < k_size) {
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
} else if ((rhs_vert + 2) < k_size) {
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0);
@ -790,37 +790,37 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
if (!CHECK_LHS_BOUNDARY) {
if ((threadIdx.y/4+k+24) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16));
lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24));
lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
lhs_pf3 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+24));
} else if ((threadIdx.y/4+k+16) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16));
lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
} else if ((threadIdx.y/4+k+8) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
} else if ((threadIdx.y/4+k) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
}
} else {
// just CHECK_LHS_BOUNDARY
if (lhs_vert + 3 < m_size) {
if ((threadIdx.y/4+k+24) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16));
lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24));
lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
lhs_pf3 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+24));
} else if ((threadIdx.y/4+k+16) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16));
lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf2 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+16));
} else if ((threadIdx.y/4+k+8) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8));
lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
lhs_pf1 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k+8));
} else if ((threadIdx.y/4+k) < k_size) {
lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k));
lhs_pf0 =lhs.loadPacket<Unaligned>(lhs_vert, (threadIdx.y/4+k));
}
} else if (lhs_vert + 2 < m_size) {
if ((threadIdx.y/4+k+24) < k_size) {
@ -909,8 +909,8 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
if (!CHECK_RHS_BOUNDARY) {
if ((rhs_vert + 3) < k_size) {
// just CHECK_RHS_BOUNDARY
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1);
rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
rhs_pf1 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz1);
} else if (rhs_vert + 2 < k_size) {
// just CHECK_RHS_BOUNDARY
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
@ -932,8 +932,8 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
if (rhs_horiz1 < n_size) {
if ((rhs_vert + 3) < k_size) {
// just CHECK_RHS_BOUNDARY
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1);
rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
rhs_pf1 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz1);
} else if (rhs_vert + 2 < k_size) {
// just CHECK_RHS_BOUNDARY
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);
@ -954,7 +954,7 @@ EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
} else if (rhs_horiz0 < n_size) {
if ((rhs_vert + 3) < k_size) {
// just CHECK_RHS_BOUNDARY
rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0);
rhs_pf0 = rhs.loadPacket<Unaligned>(rhs_vert, rhs_horiz0);
} else if ((rhs_vert + 2) < k_size) {
// just CHECK_RHS_BOUNDARY
rhs_pf0.x = rhs(rhs_vert, rhs_horiz0);