mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 12:46:00 +08:00
Adding EIGEN_STRONG_INLINE back; using size() instead of dimensions.TotalSize() on Tensor.
This commit is contained in:
parent
12387abad5
commit
3be3963021
@ -44,14 +44,14 @@ struct SyclDevice {
|
|||||||
// destructor
|
// destructor
|
||||||
~SyclDevice() { deallocate_all(); }
|
~SyclDevice() { deallocate_all(); }
|
||||||
|
|
||||||
template <typename T> void deallocate(T *p) const {
|
template <typename T> EIGEN_STRONG_INLINE void deallocate(T *p) const {
|
||||||
auto it = buffer_map.find(p);
|
auto it = buffer_map.find(p);
|
||||||
if (it != buffer_map.end()) {
|
if (it != buffer_map.end()) {
|
||||||
buffer_map.erase(it);
|
buffer_map.erase(it);
|
||||||
internal::aligned_free(p);
|
internal::aligned_free(p);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void deallocate_all() const {
|
EIGEN_STRONG_INLINE void deallocate_all() const {
|
||||||
std::map<const void *, std::shared_ptr<void>>::iterator it=buffer_map.begin();
|
std::map<const void *, std::shared_ptr<void>>::iterator it=buffer_map.begin();
|
||||||
while (it!=buffer_map.end()) {
|
while (it!=buffer_map.end()) {
|
||||||
auto p=it->first;
|
auto p=it->first;
|
||||||
@ -88,23 +88,23 @@ struct SyclDevice {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// allocating memory on the cpu
|
/// allocating memory on the cpu
|
||||||
void *allocate(size_t) const {
|
EIGEN_STRONG_INLINE void *allocate(size_t) const {
|
||||||
return internal::aligned_malloc(8);
|
return internal::aligned_malloc(8);
|
||||||
}
|
}
|
||||||
|
|
||||||
// some runtime conditions that can be applied here
|
// some runtime conditions that can be applied here
|
||||||
bool isDeviceSuitable() const { return true; }
|
EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; }
|
||||||
|
|
||||||
void memcpy(void *dst, const void *src, size_t n) const {
|
EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const {
|
||||||
::memcpy(dst, src, n);
|
::memcpy(dst, src, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T> void memcpyHostToDevice(T *dst, const T *src, size_t n) const {
|
template<typename T> EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const {
|
||||||
auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(dst, n).first->second.get()))-> template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::host_buffer>();
|
auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(dst, n).first->second.get()))-> template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::host_buffer>();
|
||||||
memcpy(host_acc.get_pointer(), src, n);
|
memcpy(host_acc.get_pointer(), src, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void parallel_for_setup(size_t n, size_t &tileSize, size_t &rng, size_t &GRange) const {
|
EIGEN_STRONG_INLINE void parallel_for_setup(size_t n, size_t &tileSize, size_t &rng, size_t &GRange) const {
|
||||||
tileSize =m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
|
tileSize =m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
|
||||||
rng = n;
|
rng = n;
|
||||||
if (rng==0) rng=1;
|
if (rng==0) rng=1;
|
||||||
@ -116,7 +116,7 @@ struct SyclDevice {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T> void memcpyDeviceToHost(T *dst, const T *src, size_t n) const {
|
template<typename T> EIGEN_STRONG_INLINE void memcpyDeviceToHost(T *dst, const T *src, size_t n) const {
|
||||||
auto it = buffer_map.find(src);
|
auto it = buffer_map.find(src);
|
||||||
if (it != buffer_map.end()) {
|
if (it != buffer_map.end()) {
|
||||||
size_t rng, GRange, tileSize;
|
size_t rng, GRange, tileSize;
|
||||||
@ -141,7 +141,7 @@ struct SyclDevice {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T> void memset(T *buff, int c, size_t n) const {
|
template<typename T> EIGEN_STRONG_INLINE void memset(T *buff, int c, size_t n) const {
|
||||||
|
|
||||||
size_t rng, GRange, tileSize;
|
size_t rng, GRange, tileSize;
|
||||||
parallel_for_setup(n/sizeof(T), tileSize, rng, GRange);
|
parallel_for_setup(n/sizeof(T), tileSize, rng, GRange);
|
||||||
@ -158,7 +158,7 @@ struct SyclDevice {
|
|||||||
});
|
});
|
||||||
m_queue.throw_asynchronous();
|
m_queue.throw_asynchronous();
|
||||||
}
|
}
|
||||||
int majorDeviceVersion() const {
|
EIGEN_STRONG_INLINE int majorDeviceVersion() const {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -29,11 +29,11 @@ void test_device_sycl(const Eigen::SyclDevice &sycl_device) {
|
|||||||
array<int, 1> tensorRange = {{sizeDim1}};
|
array<int, 1> tensorRange = {{sizeDim1}};
|
||||||
Tensor<int, 1> in(tensorRange);
|
Tensor<int, 1> in(tensorRange);
|
||||||
Tensor<int, 1> in1(tensorRange);
|
Tensor<int, 1> in1(tensorRange);
|
||||||
memset(in1.data(), 1,in1.dimensions().TotalSize()*sizeof(int));
|
memset(in1.data(), 1,in1.size()*sizeof(int));
|
||||||
int * gpu_in_data = static_cast<int*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(int)));
|
int * gpu_in_data = static_cast<int*>(sycl_device.allocate(in.size()*sizeof(int)));
|
||||||
sycl_device.memset(gpu_in_data, 1,in.dimensions().TotalSize()*sizeof(int) );
|
sycl_device.memset(gpu_in_data, 1,in.size()*sizeof(int) );
|
||||||
sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.dimensions().TotalSize()*sizeof(int) );
|
sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(int) );
|
||||||
for (int i=0; i<in.dimensions().TotalSize(); i++)
|
for (int i=0; i<in.size(); i++)
|
||||||
VERIFY_IS_APPROX(in(i), in1(i));
|
VERIFY_IS_APPROX(in(i), in1(i));
|
||||||
sycl_device.deallocate(gpu_in_data);
|
sycl_device.deallocate(gpu_in_data);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user