mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Misc improvements and optimizations
This commit is contained in:
parent
4ed213f97b
commit
1e911b276c
@ -510,7 +510,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
||||||
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
||||||
EvalTo evalToTmp(local, m_kernelArg);
|
EvalTo evalToTmp(local, m_kernelArg);
|
||||||
internal::TensorExecutor<const EvalTo, Device>::run(evalToTmp, m_device);
|
const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
|
||||||
|
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
|
||||||
|
|
||||||
m_kernel = local;
|
m_kernel = local;
|
||||||
m_local_kernel = true;
|
m_local_kernel = true;
|
||||||
@ -815,7 +816,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
|
|||||||
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
|
||||||
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
typedef TensorEvalToOp<const KernelArgType> EvalTo;
|
||||||
EvalTo evalToTmp(local, m_kernelArg);
|
EvalTo evalToTmp(local, m_kernelArg);
|
||||||
internal::TensorExecutor<const EvalTo, GpuDevice>::run(evalToTmp, m_device);
|
const bool PacketAccess = internal::IsVectorizable<GpuDevice, KernelArgType>::value;
|
||||||
|
internal::TensorExecutor<const EvalTo, GpuDevice, PacketAccess>::run(evalToTmp, m_device);
|
||||||
|
|
||||||
m_kernel = local;
|
m_kernel = local;
|
||||||
m_local_kernel = true;
|
m_local_kernel = true;
|
||||||
|
@ -113,9 +113,9 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
eigen_assert(scalar == NULL);
|
||||||
return true;
|
return m_impl.evalSubExprsIfNeeded(m_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
|
||||||
|
@ -116,7 +116,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
}
|
}
|
||||||
typedef TensorEvalToOp<const ArgType> EvalTo;
|
typedef TensorEvalToOp<const ArgType> EvalTo;
|
||||||
EvalTo evalToTmp(m_buffer, m_op);
|
EvalTo evalToTmp(m_buffer, m_op);
|
||||||
static const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value;
|
const bool PacketAccess = internal::IsVectorizable<Device, ArgType>::value;
|
||||||
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
|
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
|
||||||
m_impl.cleanup();
|
m_impl.cleanup();
|
||||||
return true;
|
return true;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user