mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-23 18:19:34 +08:00
Enable the use of the packet api to evaluate tensor broadcasts. This speed things up quite a bit:
Before" M_broadcasting/10 500000 3690 27.10 MFlops/s BM_broadcasting/80 500000 4014 1594.24 MFlops/s BM_broadcasting/640 100000 14770 27731.35 MFlops/s BM_broadcasting/4K 5000 632711 39512.48 MFlops/s After: BM_broadcasting/10 500000 4287 23.33 MFlops/s BM_broadcasting/80 500000 4455 1436.41 MFlops/s BM_broadcasting/640 200000 10195 40173.01 MFlops/s BM_broadcasting/4K 5000 423746 58997.57 MFlops/s
This commit is contained in:
parent
5fa27574dd
commit
e7e64c3277
@ -106,7 +106,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
|
||||
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
|
||||
|
||||
enum {
|
||||
IsAligned = false,
|
||||
IsAligned = true,
|
||||
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
|
||||
Layout = TensorEvaluator<ArgType, Device>::Layout,
|
||||
RawAccess = false
|
||||
|
Loading…
x
Reference in New Issue
Block a user