mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-22 12:37:35 +08:00
Merge with dfa991cbae98cde7db5aef5ff1bb4b3d51cc362b
This commit is contained in:
commit
e7457e419d
@ -157,7 +157,11 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable>
|
|||||||
template <typename Evaluator, typename Index>
|
template <typename Evaluator, typename Index>
|
||||||
__global__ void
|
__global__ void
|
||||||
__launch_bounds__(1024)
|
__launch_bounds__(1024)
|
||||||
EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) {
|
EigenMetaKernel_NonVectorizable(Evaluator memcopied_eval, Index size) {
|
||||||
|
// Cuda memcopies the kernel arguments. That's fine for POD, but for more
|
||||||
|
// complex types such as evaluators we should really conform to the C++
|
||||||
|
// standard and call a proper copy constructor.
|
||||||
|
Evaluator eval(memcopied_eval);
|
||||||
|
|
||||||
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const Index step_size = blockDim.x * gridDim.x;
|
const Index step_size = blockDim.x * gridDim.x;
|
||||||
@ -171,7 +175,11 @@ EigenMetaKernel_NonVectorizable(Evaluator eval, Index size) {
|
|||||||
template <typename Evaluator, typename Index>
|
template <typename Evaluator, typename Index>
|
||||||
__global__ void
|
__global__ void
|
||||||
__launch_bounds__(1024)
|
__launch_bounds__(1024)
|
||||||
EigenMetaKernel_Vectorizable(Evaluator eval, Index size) {
|
EigenMetaKernel_Vectorizable(Evaluator memcopied_eval, Index size) {
|
||||||
|
// Cuda memcopies the kernel arguments. That's fine for POD, but for more
|
||||||
|
// complex types such as evaluators we should really conform to the C++
|
||||||
|
// standard and call a proper copy constructor.
|
||||||
|
Evaluator eval(memcopied_eval);
|
||||||
|
|
||||||
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
const Index first_index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
const Index step_size = blockDim.x * gridDim.x;
|
const Index step_size = blockDim.x * gridDim.x;
|
||||||
|
@ -197,7 +197,7 @@ int get_random_seed() {
|
|||||||
#else
|
#else
|
||||||
timespec ts;
|
timespec ts;
|
||||||
clock_gettime(CLOCK_REALTIME, &ts);
|
clock_gettime(CLOCK_REALTIME, &ts);
|
||||||
return ts.tv_nsec;
|
return static_cast<int>(ts.tv_nsec);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -220,7 +220,7 @@ template <typename T> class UniformRandomGenerator {
|
|||||||
return random<T>();
|
return random<T>();
|
||||||
}
|
}
|
||||||
template<typename Index>
|
template<typename Index>
|
||||||
typename internal::packet_traits<T>::type packetOp(Index i, Index j = 0) const {
|
typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
|
||||||
const int packetSize = internal::packet_traits<T>::size;
|
const int packetSize = internal::packet_traits<T>::size;
|
||||||
EIGEN_ALIGN_DEFAULT T values[packetSize];
|
EIGEN_ALIGN_DEFAULT T values[packetSize];
|
||||||
for (int i = 0; i < packetSize; ++i) {
|
for (int i = 0; i < packetSize; ++i) {
|
||||||
@ -252,8 +252,8 @@ template <> class UniformRandomGenerator<float> {
|
|||||||
typename internal::packet_traits<float>::type packetOp(Index i, Index j = 0) const {
|
typename internal::packet_traits<float>::type packetOp(Index i, Index j = 0) const {
|
||||||
const int packetSize = internal::packet_traits<float>::size;
|
const int packetSize = internal::packet_traits<float>::size;
|
||||||
EIGEN_ALIGN_DEFAULT float values[packetSize];
|
EIGEN_ALIGN_DEFAULT float values[packetSize];
|
||||||
for (int i = 0; i < packetSize; ++i) {
|
for (int k = 0; k < packetSize; ++k) {
|
||||||
values[i] = this->operator()(i, j);
|
values[k] = this->operator()(i, j);
|
||||||
}
|
}
|
||||||
return internal::pload<typename internal::packet_traits<float>::type>(values);
|
return internal::pload<typename internal::packet_traits<float>::type>(values);
|
||||||
}
|
}
|
||||||
@ -285,8 +285,8 @@ template <> class UniformRandomGenerator<double> {
|
|||||||
typename internal::packet_traits<double>::type packetOp(Index i, Index j = 0) const {
|
typename internal::packet_traits<double>::type packetOp(Index i, Index j = 0) const {
|
||||||
const int packetSize = internal::packet_traits<double>::size;
|
const int packetSize = internal::packet_traits<double>::size;
|
||||||
EIGEN_ALIGN_DEFAULT double values[packetSize];
|
EIGEN_ALIGN_DEFAULT double values[packetSize];
|
||||||
for (int i = 0; i < packetSize; ++i) {
|
for (int k = 0; k < packetSize; ++k) {
|
||||||
values[i] = this->operator()(i, j);
|
values[k] = this->operator()(i, j);
|
||||||
}
|
}
|
||||||
return internal::pload<typename internal::packet_traits<double>::type>(values);
|
return internal::pload<typename internal::packet_traits<double>::type>(values);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user