mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Merged in benoitsteiner/opencl (pull request PR-318)
Improved support for OpenCL
This commit is contained in:
commit
b8e805497e
@ -437,10 +437,17 @@ endif()
|
|||||||
|
|
||||||
# add SYCL
|
# add SYCL
|
||||||
option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
|
option(EIGEN_TEST_SYCL "Add Sycl support." OFF)
|
||||||
|
option(EIGEN_SYCL_TRISYCL "Use the triSYCL Sycl implementation (ComputeCPP by default)." OFF)
|
||||||
if(EIGEN_TEST_SYCL)
|
if(EIGEN_TEST_SYCL)
|
||||||
set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}")
|
set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}")
|
||||||
include(FindComputeCpp)
|
if(EIGEN_SYCL_TRISYCL)
|
||||||
endif()
|
message(STATUS "Using triSYCL")
|
||||||
|
include(FindTriSYCL)
|
||||||
|
else(EIGEN_SYCL_TRISYCL)
|
||||||
|
message(STATUS "Using ComputeCPP SYCL")
|
||||||
|
include(FindComputeCpp)
|
||||||
|
endif(EIGEN_SYCL_TRISYCL)
|
||||||
|
endif(EIGEN_TEST_SYCL)
|
||||||
|
|
||||||
add_subdirectory(unsupported)
|
add_subdirectory(unsupported)
|
||||||
|
|
||||||
|
@ -111,7 +111,6 @@ endmacro(ei_add_test_internal)
|
|||||||
|
|
||||||
# SYCL
|
# SYCL
|
||||||
macro(ei_add_test_internal_sycl testname testname_with_suffix)
|
macro(ei_add_test_internal_sycl testname testname_with_suffix)
|
||||||
include_directories( SYSTEM ${COMPUTECPP_PACKAGE_ROOT_DIR}/include)
|
|
||||||
set(targetname ${testname_with_suffix})
|
set(targetname ${testname_with_suffix})
|
||||||
|
|
||||||
if(EIGEN_ADD_TEST_FILENAME_EXTENSION)
|
if(EIGEN_ADD_TEST_FILENAME_EXTENSION)
|
||||||
@ -120,23 +119,31 @@ macro(ei_add_test_internal_sycl testname testname_with_suffix)
|
|||||||
set(filename ${testname}.cpp)
|
set(filename ${testname}.cpp)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set( include_file ${CMAKE_CURRENT_BINARY_DIR}/inc_${filename})
|
set( include_file "${CMAKE_CURRENT_BINARY_DIR}/inc_${filename}")
|
||||||
set( bc_file ${CMAKE_CURRENT_BINARY_DIR}/${filename})
|
set( bc_file "${CMAKE_CURRENT_BINARY_DIR}/${filename}.sycl")
|
||||||
set( host_file ${CMAKE_CURRENT_SOURCE_DIR}/${filename})
|
set( host_file "${CMAKE_CURRENT_SOURCE_DIR}/${filename}")
|
||||||
|
|
||||||
ADD_CUSTOM_COMMAND(
|
if(NOT EIGEN_SYCL_TRISYCL)
|
||||||
OUTPUT ${include_file}
|
include_directories( SYSTEM ${COMPUTECPP_PACKAGE_ROOT_DIR}/include)
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${host_file}\\\"" > ${include_file}
|
|
||||||
COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${bc_file}.sycl\\\"" >> ${include_file}
|
|
||||||
DEPENDS ${filename} ${bc_file}.sycl
|
|
||||||
COMMENT "Building ComputeCpp integration header file ${include_file}"
|
|
||||||
)
|
|
||||||
# Add a custom target for the generated integration header
|
|
||||||
add_custom_target(${testname}_integration_header_sycl DEPENDS ${include_file})
|
|
||||||
|
|
||||||
add_executable(${targetname} ${include_file})
|
ADD_CUSTOM_COMMAND(
|
||||||
add_dependencies(${targetname} ${testname}_integration_header_sycl)
|
OUTPUT ${include_file}
|
||||||
add_sycl_to_target(${targetname} ${filename} ${CMAKE_CURRENT_BINARY_DIR})
|
COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${host_file}\\\"" > ${include_file}
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${bc_file}\\\"" >> ${include_file}
|
||||||
|
DEPENDS ${filename} ${bc_file}
|
||||||
|
COMMENT "Building ComputeCpp integration header file ${include_file}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add a custom target for the generated integration header
|
||||||
|
add_custom_target("${testname}_integration_header_sycl" DEPENDS ${include_file})
|
||||||
|
|
||||||
|
add_executable(${targetname} ${include_file})
|
||||||
|
add_dependencies(${targetname} "${testname}_integration_header_sycl")
|
||||||
|
else()
|
||||||
|
add_executable(${targetname} ${host_file})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_sycl_to_target(${targetname} ${CMAKE_CURRENT_BINARY_DIR} ${filename})
|
||||||
|
|
||||||
if (targetname MATCHES "^eigen2_")
|
if (targetname MATCHES "^eigen2_")
|
||||||
add_dependencies(eigen2_buildtests ${targetname})
|
add_dependencies(eigen2_buildtests ${targetname})
|
||||||
@ -467,7 +474,11 @@ macro(ei_testing_print_summary)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(EIGEN_TEST_SYCL)
|
if(EIGEN_TEST_SYCL)
|
||||||
message(STATUS "SYCL: ON")
|
if(EIGEN_SYCL_TRISYCL)
|
||||||
|
message(STATUS "SYCL: ON (using triSYCL)")
|
||||||
|
else()
|
||||||
|
message(STATUS "SYCL: ON (using computeCPP)")
|
||||||
|
endif()
|
||||||
else()
|
else()
|
||||||
message(STATUS "SYCL: OFF")
|
message(STATUS "SYCL: OFF")
|
||||||
endif()
|
endif()
|
||||||
|
@ -38,11 +38,6 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
|||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
|
||||||
message(FATAL_ERROR
|
message(FATAL_ERROR
|
||||||
"host compiler - Not found! (gcc version must be at least 4.8)")
|
"host compiler - Not found! (gcc version must be at least 4.8)")
|
||||||
# Require the GCC dual ABI to be disabled for 5.1 or higher
|
|
||||||
elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1)
|
|
||||||
set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True")
|
|
||||||
message(STATUS
|
|
||||||
"host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)")
|
|
||||||
else()
|
else()
|
||||||
message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}")
|
message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}")
|
||||||
endif()
|
endif()
|
||||||
@ -64,6 +59,12 @@ option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode"
|
|||||||
${COMPUTECPP_64_BIT_DEFAULT})
|
${COMPUTECPP_64_BIT_DEFAULT})
|
||||||
mark_as_advanced(COMPUTECPP_64_BIT_CODE)
|
mark_as_advanced(COMPUTECPP_64_BIT_CODE)
|
||||||
|
|
||||||
|
option(COMPUTECPP_DISABLE_GCC_DUAL_ABI "Compile with pre-5.1 ABI" OFF)
|
||||||
|
mark_as_advanced(COMPUTECPP_DISABLE_GCC_DUAL_ABI)
|
||||||
|
|
||||||
|
set(COMPUTECPP_USER_FLAGS "" CACHE STRING "User flags for compute++")
|
||||||
|
mark_as_advanced(COMPUTECPP_USER_FLAGS)
|
||||||
|
|
||||||
# Find OpenCL package
|
# Find OpenCL package
|
||||||
find_package(OpenCL REQUIRED)
|
find_package(OpenCL REQUIRED)
|
||||||
|
|
||||||
@ -74,7 +75,6 @@ if(NOT COMPUTECPP_PACKAGE_ROOT_DIR)
|
|||||||
else()
|
else()
|
||||||
message(STATUS "ComputeCpp package - Found")
|
message(STATUS "ComputeCpp package - Found")
|
||||||
endif()
|
endif()
|
||||||
option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package")
|
|
||||||
|
|
||||||
# Obtain the path to compute++
|
# Obtain the path to compute++
|
||||||
find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS
|
find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS
|
||||||
@ -138,8 +138,6 @@ else()
|
|||||||
message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}")
|
message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(COMPUTECPP_DEVICE_COMPILER_FLAGS ${COMPUTECPP_DEVICE_COMPILER_FLAGS} -sycl-compress-name -Wall -no-serial-memop -DEIGEN_NO_ASSERTION_CHECKING=1)
|
|
||||||
|
|
||||||
# Check if the platform is supported
|
# Check if the platform is supported
|
||||||
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported"
|
execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported"
|
||||||
OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED
|
OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED
|
||||||
@ -155,6 +153,13 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set(COMPUTECPP_USER_FLAGS
|
||||||
|
-sycl-compress-name
|
||||||
|
-Wall
|
||||||
|
-no-serial-memop
|
||||||
|
-DEIGEN_NO_ASSERTION_CHECKING=1
|
||||||
|
)
|
||||||
|
|
||||||
####################
|
####################
|
||||||
# __build_sycl
|
# __build_sycl
|
||||||
####################
|
####################
|
||||||
@ -165,8 +170,11 @@ endif()
|
|||||||
# targetName : Name of the target.
|
# targetName : Name of the target.
|
||||||
# sourceFile : Source file to be compiled.
|
# sourceFile : Source file to be compiled.
|
||||||
# binaryDir : Intermediate directory to output the integration header.
|
# binaryDir : Intermediate directory to output the integration header.
|
||||||
|
# fileCounter : Counter included in name of custom target. Different counter
|
||||||
|
# values prevent duplicated names of custom target when source files with the same name,
|
||||||
|
# but located in different directories, are used for the same target.
|
||||||
#
|
#
|
||||||
function(__build_spir targetName sourceFile binaryDir)
|
function(__build_spir targetName sourceFile binaryDir fileCounter)
|
||||||
|
|
||||||
# Retrieve source file name.
|
# Retrieve source file name.
|
||||||
get_filename_component(sourceFileName ${sourceFile} NAME)
|
get_filename_component(sourceFileName ${sourceFile} NAME)
|
||||||
@ -175,12 +183,16 @@ function(__build_spir targetName sourceFile binaryDir)
|
|||||||
set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl)
|
set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl)
|
||||||
|
|
||||||
# Add any user-defined include to the device compiler
|
# Add any user-defined include to the device compiler
|
||||||
|
set(device_compiler_includes "")
|
||||||
get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY
|
get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY
|
||||||
INCLUDE_DIRECTORIES)
|
INCLUDE_DIRECTORIES)
|
||||||
set(device_compiler_includes "")
|
|
||||||
foreach(directory ${includeDirectories})
|
foreach(directory ${includeDirectories})
|
||||||
set(device_compiler_includes "-I${directory}" ${device_compiler_includes})
|
set(device_compiler_includes "-I${directory}" ${device_compiler_includes})
|
||||||
endforeach()
|
endforeach()
|
||||||
|
get_target_property(targetIncludeDirectories ${targetName} INCLUDE_DIRECTORIES)
|
||||||
|
foreach(directory ${targetIncludeDirectories})
|
||||||
|
set(device_compiler_includes "-I${directory}" ${device_compiler_includes})
|
||||||
|
endforeach()
|
||||||
if (CMAKE_INCLUDE_PATH)
|
if (CMAKE_INCLUDE_PATH)
|
||||||
foreach(directory ${CMAKE_INCLUDE_PATH})
|
foreach(directory ${CMAKE_INCLUDE_PATH})
|
||||||
set(device_compiler_includes "-I${directory}"
|
set(device_compiler_includes "-I${directory}"
|
||||||
@ -188,6 +200,9 @@ function(__build_spir targetName sourceFile binaryDir)
|
|||||||
endforeach()
|
endforeach()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set(COMPUTECPP_DEVICE_COMPILER_FLAGS
|
||||||
|
${COMPUTECPP_DEVICE_COMPILER_FLAGS}
|
||||||
|
${COMPUTECPP_USER_FLAGS})
|
||||||
# Convert argument list format
|
# Convert argument list format
|
||||||
separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS)
|
separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS)
|
||||||
|
|
||||||
@ -201,9 +216,10 @@ function(__build_spir targetName sourceFile binaryDir)
|
|||||||
${device_compiler_includes}
|
${device_compiler_includes}
|
||||||
-o ${outputSyclFile}
|
-o ${outputSyclFile}
|
||||||
-c ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
|
-c ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
|
||||||
DEPENDS ${sourceFile}
|
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}
|
||||||
|
IMPLICIT_DEPENDS CXX "${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile}"
|
||||||
WORKING_DIRECTORY ${binaryDir}
|
WORKING_DIRECTORY ${binaryDir}
|
||||||
COMMENT "Building ComputeCpp integration header file ${outputSyclFile}")
|
COMMENT "Building ComputeCpp integration header file ${outputSyclFile}")
|
||||||
|
|
||||||
# Add a custom target for the generated integration header
|
# Add a custom target for the generated integration header
|
||||||
add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile})
|
add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile})
|
||||||
@ -230,13 +246,18 @@ endfunction()
|
|||||||
# target and sets a dependancy on that new command.
|
# target and sets a dependancy on that new command.
|
||||||
#
|
#
|
||||||
# targetName : Name of the target to add a SYCL to.
|
# targetName : Name of the target to add a SYCL to.
|
||||||
# sourceFile : Source file to be compiled for SYCL.
|
|
||||||
# binaryDir : Intermediate directory to output the integration header.
|
# binaryDir : Intermediate directory to output the integration header.
|
||||||
|
# sourceFiles : Source files to be compiled for SYCL.
|
||||||
#
|
#
|
||||||
function(add_sycl_to_target targetName sourceFile binaryDir)
|
function(add_sycl_to_target targetName binaryDir sourceFiles)
|
||||||
|
|
||||||
|
set(sourceFiles ${sourceFiles} ${ARGN})
|
||||||
|
set(fileCounter 0)
|
||||||
# Add custom target to run compute++ and generate the integration header
|
# Add custom target to run compute++ and generate the integration header
|
||||||
__build_spir(${targetName} ${sourceFile} ${binaryDir})
|
foreach(sourceFile ${sourceFiles})
|
||||||
|
__build_spir(${targetName} ${sourceFile} ${binaryDir} ${fileCounter})
|
||||||
|
MATH(EXPR fileCounter "${fileCounter} + 1")
|
||||||
|
endforeach()
|
||||||
|
|
||||||
# Link with the ComputeCpp runtime library
|
# Link with the ComputeCpp runtime library
|
||||||
target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY}
|
target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY}
|
||||||
|
152
cmake/FindTriSYCL.cmake
Normal file
152
cmake/FindTriSYCL.cmake
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
#.rst:
|
||||||
|
# FindTriSYCL
|
||||||
|
#---------------
|
||||||
|
#
|
||||||
|
# TODO : insert Copyright and licence
|
||||||
|
|
||||||
|
#########################
|
||||||
|
# FindTriSYCL.cmake
|
||||||
|
#########################
|
||||||
|
#
|
||||||
|
# Tools for finding and building with TriSYCL.
|
||||||
|
#
|
||||||
|
# User must define TRISYCL_INCLUDE_DIR pointing to the triSYCL
|
||||||
|
# include directory.
|
||||||
|
#
|
||||||
|
# Latest version of this file can be found at:
|
||||||
|
# https://github.com/triSYCL/triSYCL
|
||||||
|
|
||||||
|
# Requite CMake version 3.5 or higher
|
||||||
|
cmake_minimum_required (VERSION 3.5)
|
||||||
|
|
||||||
|
# Check that a supported host compiler can be found
|
||||||
|
if(CMAKE_COMPILER_IS_GNUCXX)
|
||||||
|
# Require at least gcc 5.4
|
||||||
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4)
|
||||||
|
message(FATAL_ERROR
|
||||||
|
"host compiler - Not found! (gcc version must be at least 5.4)")
|
||||||
|
else()
|
||||||
|
message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}")
|
||||||
|
endif()
|
||||||
|
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||||
|
# Require at least clang 3.9
|
||||||
|
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.9)
|
||||||
|
message(FATAL_ERROR
|
||||||
|
"host compiler - Not found! (clang version must be at least 3.9)")
|
||||||
|
else()
|
||||||
|
message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}")
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
message(WARNING
|
||||||
|
"host compiler - Not found! (triSYCL supports GCC and Clang)")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
#triSYCL options
|
||||||
|
option(TRISYCL_OPENMP "triSYCL multi-threading with OpenMP" ON)
|
||||||
|
option(TRISYCL_OPENCL "triSYCL OpenCL interoperability mode" OFF)
|
||||||
|
option(TRISYCL_NO_ASYNC "triSYCL use synchronous kernel execution" OFF)
|
||||||
|
option(TRISYCL_DEBUG "triSCYL use debug mode" OFF)
|
||||||
|
option(TRISYCL_DEBUG_STRUCTORS "triSYCL trace of object lifetimes" OFF)
|
||||||
|
option(TRISYCL_TRACE_KERNEL "triSYCL trace of kernel execution" OFF)
|
||||||
|
|
||||||
|
mark_as_advanced(TRISYCL_OPENMP)
|
||||||
|
mark_as_advanced(TRISYCL_OPENCL)
|
||||||
|
mark_as_advanced(TRISYCL_NO_ASYNC)
|
||||||
|
mark_as_advanced(TRISYCL_DEBUG)
|
||||||
|
mark_as_advanced(TRISYCL_DEBUG_STRUCTORS)
|
||||||
|
mark_as_advanced(TRISYCL_TRACE_KERNEL)
|
||||||
|
|
||||||
|
#triSYCL definitions
|
||||||
|
set(CL_SYCL_LANGUAGE_VERSION 220 CACHE VERSION
|
||||||
|
"Host language version to be used by trisYCL (default is: 220)")
|
||||||
|
set(TRISYCL_CL_LANGUAGE_VERSION 220 CACHE VERSION
|
||||||
|
"Device language version to be used by trisYCL (default is: 220)")
|
||||||
|
#set(TRISYCL_COMPILE_OPTIONS "-std=c++1z -Wall -Wextra")
|
||||||
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
|
set(CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
|
||||||
|
# Find OpenCL package
|
||||||
|
if(TRISYCL_OPENCL)
|
||||||
|
find_package(OpenCL REQUIRED)
|
||||||
|
if(UNIX)
|
||||||
|
set(BOOST_COMPUTE_INCPATH /usr/include/compute CACHE PATH
|
||||||
|
"Path to Boost.Compute headers (default is: /usr/include/compute)")
|
||||||
|
endif(UNIX)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Find OpenMP package
|
||||||
|
if(TRISYCL_OPENMP)
|
||||||
|
find_package(OpenMP REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Find Boost
|
||||||
|
find_package(Boost 1.58 REQUIRED COMPONENTS chrono log)
|
||||||
|
|
||||||
|
# If debug or trace we need boost log
|
||||||
|
if(TRISYCL_DEBUG OR TRISYCL_DEBUG_STRUCTORS OR TRISYCL_TRACE_KERNEL)
|
||||||
|
set(LOG_NEEDED ON)
|
||||||
|
else()
|
||||||
|
set(LOG_NEEDED OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
# Find triSYCL directory
|
||||||
|
if(NOT TRISYCL_INCLUDE_DIR)
|
||||||
|
message(FATAL_ERROR
|
||||||
|
"triSYCL include directory - Not found! (please set TRISYCL_INCLUDE_DIR")
|
||||||
|
else()
|
||||||
|
message(STATUS "triSYCL include directory - Found ${TRISYCL_INCLUDE_DIR}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
#######################
|
||||||
|
# add_sycl_to_target
|
||||||
|
#######################
|
||||||
|
#
|
||||||
|
# Sets the proper flags and includes for the target compilation.
|
||||||
|
#
|
||||||
|
# targetName : Name of the target to add a SYCL to.
|
||||||
|
# sourceFile : Source file to be compiled for SYCL.
|
||||||
|
# binaryDir : Intermediate directory to output the integration header.
|
||||||
|
#
|
||||||
|
function(add_sycl_to_target targetName sourceFile binaryDir)
|
||||||
|
|
||||||
|
# Add include directories to the "#include <>" paths
|
||||||
|
target_include_directories (${targetName} PUBLIC
|
||||||
|
${TRISYCL_INCLUDE_DIR}
|
||||||
|
${Boost_INCLUDE_DIRS}
|
||||||
|
$<$<BOOL:${TRISYCL_OPENCL}>:${OpenCL_INCLUDE_DIRS}>
|
||||||
|
$<$<BOOL:${TRISYCL_OPENCL}>:${BOOST_COMPUTE_INCPATH}>)
|
||||||
|
|
||||||
|
|
||||||
|
# Link dependencies
|
||||||
|
target_link_libraries(${targetName} PUBLIC
|
||||||
|
$<$<BOOL:${TRISYCL_OPENCL}>:${OpenCL_LIBRARIES}>
|
||||||
|
Threads::Threads
|
||||||
|
$<$<BOOL:${LOG_NEEDED}>:Boost::log>
|
||||||
|
Boost::chrono)
|
||||||
|
|
||||||
|
|
||||||
|
# Compile definitions
|
||||||
|
target_compile_definitions(${targetName} PUBLIC
|
||||||
|
$<$<BOOL:${TRISYCL_NO_ASYNC}>:TRISYCL_NO_ASYNC>
|
||||||
|
$<$<BOOL:${TRISYCL_OPENCL}>:TRISYCL_OPENCL>
|
||||||
|
$<$<BOOL:${TRISYCL_DEBUG}>:TRISYCL_DEBUG>
|
||||||
|
$<$<BOOL:${TRISYCL_DEBUG_STRUCTORS}>:TRISYCL_DEBUG_STRUCTORS>
|
||||||
|
$<$<BOOL:${TRISYCL_TRACE_KERNEL}>:TRISYCL_TRACE_KERNEL>
|
||||||
|
$<$<BOOL:${LOG_NEEDED}>:BOOST_LOG_DYN_LINK>)
|
||||||
|
|
||||||
|
# C++ and OpenMP requirements
|
||||||
|
target_compile_options(${targetName} PUBLIC
|
||||||
|
${TRISYCL_COMPILE_OPTIONS}
|
||||||
|
$<$<BOOL:${TRISYCL_OPENMP}>:${OpenMP_CXX_FLAGS}>)
|
||||||
|
|
||||||
|
if(${TRISYCL_OPENMP} AND (NOT WIN32))
|
||||||
|
# Does not support generator expressions
|
||||||
|
set_target_properties(${targetName}
|
||||||
|
PROPERTIES
|
||||||
|
LINK_FLAGS ${OpenMP_CXX_FLAGS})
|
||||||
|
endif(${TRISYCL_OPENMP} AND (NOT WIN32))
|
||||||
|
|
||||||
|
endfunction(add_sycl_to_target)
|
@ -19,7 +19,7 @@
|
|||||||
#undef isnan
|
#undef isnan
|
||||||
#undef isinf
|
#undef isinf
|
||||||
#undef isfinite
|
#undef isfinite
|
||||||
#include <SYCL/sycl.hpp>
|
#include <CL/sycl.hpp>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -195,7 +195,7 @@ typename HostExpr::Index LocalThreadSizeM, typename HostExpr::Index LocalThreadS
|
|||||||
m_j_strides(m_j_strides_), m_right_nocontract_strides(m_right_nocontract_strides_),
|
m_j_strides(m_j_strides_), m_right_nocontract_strides(m_right_nocontract_strides_),
|
||||||
left_tuple_of_accessors(left_tuple_of_accessors_), right_tuple_of_accessors(right_tuple_of_accessors_), dev(dev_){}
|
left_tuple_of_accessors(left_tuple_of_accessors_), right_tuple_of_accessors(right_tuple_of_accessors_), dev(dev_){}
|
||||||
|
|
||||||
void operator()(cl::sycl::nd_item<1> itemID) {
|
void operator()(cl::sycl::nd_item<2> itemID) {
|
||||||
typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression<HostExpr>::Type DevExpr;
|
typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression<HostExpr>::Type DevExpr;
|
||||||
typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression<LHSHostExpr>::Type LHSDevExpr;
|
typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression<LHSHostExpr>::Type LHSDevExpr;
|
||||||
typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression<RHSHostExpr>::Type RHSDevExpr;
|
typedef typename Eigen::TensorSycl::internal::ConvertToDeviceExpression<RHSHostExpr>::Type RHSDevExpr;
|
||||||
|
@ -14,7 +14,23 @@
|
|||||||
|
|
||||||
#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H)
|
#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H)
|
||||||
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
|
#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H
|
||||||
|
template<size_t Align> struct CheckAlignStatically {
|
||||||
|
static const bool Val= (((Align&(Align-1))==0) && (Align >= sizeof(void *)));
|
||||||
|
};
|
||||||
|
template <bool IsAligned, size_t Align>
|
||||||
|
struct Conditional_Allocate {
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE static void* conditional_allocate(std::size_t elements) {
|
||||||
|
return aligned_alloc(Align, elements);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template <size_t Align>
|
||||||
|
struct Conditional_Allocate<false, Align> {
|
||||||
|
|
||||||
|
EIGEN_ALWAYS_INLINE static void* conditional_allocate(std::size_t elements){
|
||||||
|
return malloc(elements);
|
||||||
|
}
|
||||||
|
};
|
||||||
template <typename Scalar, size_t Align = EIGEN_MAX_ALIGN_BYTES, class Allocator = std::allocator<Scalar>>
|
template <typename Scalar, size_t Align = EIGEN_MAX_ALIGN_BYTES, class Allocator = std::allocator<Scalar>>
|
||||||
struct SyclAllocator {
|
struct SyclAllocator {
|
||||||
typedef Scalar value_type;
|
typedef Scalar value_type;
|
||||||
@ -22,7 +38,9 @@ struct SyclAllocator {
|
|||||||
typedef typename std::allocator_traits<Allocator>::size_type size_type;
|
typedef typename std::allocator_traits<Allocator>::size_type size_type;
|
||||||
|
|
||||||
SyclAllocator( ){};
|
SyclAllocator( ){};
|
||||||
Scalar* allocate(std::size_t elements) { return static_cast<Scalar*>(aligned_alloc(Align, elements)); }
|
Scalar* allocate(std::size_t elements) {
|
||||||
|
return static_cast<Scalar*>(Conditional_Allocate<CheckAlignStatically<Align>::Val, Align>::conditional_allocate(elements));
|
||||||
|
}
|
||||||
void deallocate(Scalar * p, std::size_t size) { EIGEN_UNUSED_VARIABLE(size); free(p); }
|
void deallocate(Scalar * p, std::size_t size) { EIGEN_UNUSED_VARIABLE(size); free(p); }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -81,28 +99,26 @@ struct memsetCghFunctor{
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU and intel GPU)
|
//get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU and intel GPU)
|
||||||
EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){
|
EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){
|
||||||
auto devices = cl::sycl::device::get_devices();
|
std::vector<cl::sycl::device> supported_devices;
|
||||||
std::vector<cl::sycl::device>::iterator it =devices.begin();
|
auto plafrom_list =cl::sycl::platform::get_platforms();
|
||||||
while(it!=devices.end()) {
|
for(const auto& platform : plafrom_list){
|
||||||
///FIXME: Currently there is a bug in amd cpu OpenCL
|
auto device_list = platform.get_devices();
|
||||||
auto name = (*it).template get_info<cl::sycl::info::device::name>();
|
auto platform_name =platform.template get_info<cl::sycl::info::platform::name>();
|
||||||
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
|
std::transform(platform_name.begin(), platform_name.end(), platform_name.begin(), ::tolower);
|
||||||
auto vendor = (*it).template get_info<cl::sycl::info::device::vendor>();
|
for(const auto& device : device_list){
|
||||||
|
auto vendor = device.template get_info<cl::sycl::info::device::vendor>();
|
||||||
std::transform(vendor.begin(), vendor.end(), vendor.begin(), ::tolower);
|
std::transform(vendor.begin(), vendor.end(), vendor.begin(), ::tolower);
|
||||||
|
bool unsuported_condition = (device.is_cpu() && platform_name.find("amd")!=std::string::npos && vendor.find("apu") == std::string::npos) ||
|
||||||
if((*it).is_cpu() && vendor.find("amd")!=std::string::npos && vendor.find("apu") == std::string::npos){ // remove amd cpu as it is not supported by computecpp allow APUs
|
(device.is_gpu() && platform_name.find("intel")!=std::string::npos);
|
||||||
it = devices.erase(it);
|
if(!unsuported_condition){
|
||||||
//FIXME: currently there is a bug in intel gpu driver regarding memory allignment issue.
|
std::cout << "Platform name "<< platform_name << std::endl;
|
||||||
}else if((*it).is_gpu() && name.find("intel")!=std::string::npos){
|
supported_devices.push_back(device);
|
||||||
it = devices.erase(it);
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
++it;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return devices;
|
}
|
||||||
|
return supported_devices;
|
||||||
}
|
}
|
||||||
|
|
||||||
class QueueInterface {
|
class QueueInterface {
|
||||||
|
@ -152,33 +152,40 @@ endif()
|
|||||||
|
|
||||||
if(EIGEN_TEST_CXX11)
|
if(EIGEN_TEST_CXX11)
|
||||||
if(EIGEN_TEST_SYCL)
|
if(EIGEN_TEST_SYCL)
|
||||||
ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11")
|
if(EIGEN_SYCL_TRISYCL)
|
||||||
ei_add_test_sycl(cxx11_tensor_forced_eval_sycl "-std=c++11")
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11")
|
set(STD_CXX_FLAG "-std=c++1z")
|
||||||
ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
|
else(EIGEN_SYCL_TRISYCL)
|
||||||
ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
|
# It should be safe to always run these tests as there is some fallback code for
|
||||||
ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
|
# older compiler that don't support cxx11.
|
||||||
ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11")
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
ei_add_test_sycl(cxx11_tensor_padding_sycl "-std=c++11")
|
set(STD_CXX_FLAG "-std=c++11")
|
||||||
ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
|
endif(EIGEN_SYCL_TRISYCL)
|
||||||
ei_add_test_sycl(cxx11_tensor_contract_sycl "-std=c++11")
|
|
||||||
ei_add_test_sycl(cxx11_tensor_concatenation_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_reverse_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_forced_eval_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_convolution_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_broadcast_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_device_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_reduction_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_layout_swap_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_morphing_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_inflation_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_shuffling_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_generator_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_padding_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_patch_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_builtins_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_image_patch_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_contract_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_volume_patcP_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_concatenation_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_argmax_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_reverse_sycl ${STD_CXX_FLAG})
|
||||||
ei_add_test_sycl(cxx11_tensor_custom_op_sycl "-std=c++11")
|
ei_add_test_sycl(cxx11_tensor_convolution_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_striding_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_chipping_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_layout_swap_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_inflation_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_generator_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_patch_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_image_patch_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_volume_patch_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_argmax_sycl ${STD_CXX_FLAG})
|
||||||
|
ei_add_test_sycl(cxx11_tensor_custom_op_sycl ${STD_CXX_FLAG})
|
||||||
endif(EIGEN_TEST_SYCL)
|
endif(EIGEN_TEST_SYCL)
|
||||||
# It should be safe to always run these tests as there is some fallback code for
|
|
||||||
# older compiler that don't support cxx11.
|
|
||||||
set(CMAKE_CXX_STANDARD 11)
|
|
||||||
|
|
||||||
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||||
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user