Use computeProductBlockingSizes to compute blocking for both ShardByCol and ShardByRow cases.

This commit is contained in:
Rasmus Munk Larsen 2016-04-27 12:26:18 -07:00
parent 3dddd34133
commit 463738ccbe

View File

@ -35,9 +35,7 @@ class TensorContractionBlocking {
computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, mc_, nc_, num_threads);
}
else {
if (kc_ && mc_ && nc_) {
mc_ = (((m / num_threads) + 15) / 16) * 16;
}
computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, nc_, mc_, num_threads);
}
}