mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-24 02:29:33 +08:00
Enable the use of the packet api to evaluate tensor broadcasts. This speed things up quite a bit:
Before" M_broadcasting/10 500000 3690 27.10 MFlops/s BM_broadcasting/80 500000 4014 1594.24 MFlops/s BM_broadcasting/640 100000 14770 27731.35 MFlops/s BM_broadcasting/4K 5000 632711 39512.48 MFlops/s After: BM_broadcasting/10 500000 4287 23.33 MFlops/s BM_broadcasting/80 500000 4455 1436.41 MFlops/s BM_broadcasting/640 200000 10195 40173.01 MFlops/s BM_broadcasting/4K 5000 423746 58997.57 MFlops/s
This commit is contained in:
parent
5fa27574dd
commit
e7e64c3277
@ -106,7 +106,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
|||||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = true,
|
||||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||||
RawAccess = false
|
RawAccess = false
|
||||||
|
Loading…
x
Reference in New Issue
Block a user