diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..04d8697666 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,41 @@ + +cmake_minimum_required(VERSION 3.17) +project(Torch-TensorRT LANGUAGES CXX) + +# use c++17 +set(CMAKE_CXX_STANDARD 17) + +# Build the libraries with -fPIC +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +if (DEFINED CMAKE_MODULE_PATH) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} CACHE PATH "Path to the folder containing finders") +endif() + +include(cmake/build_options.cmake) +include(cmake/paths.cmake) +include(cmake/dependencies.cmake) +if(MSVC) + add_compile_options(/wd4624 /wd4067 /permissive-) + # When using Ninja generator, suppress the warning D9025 + string(REPLACE "/Zi" "/Z7" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") + string(REPLACE "/Zi" "/Z7" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") +endif() +# ----------------------------------------- +# compilation +# ----------------------------------------- +add_subdirectory(core) +add_subdirectory(cpp) + +include(CMakePackageConfigHelpers) + +configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in + "${CMAKE_CURRENT_BINARY_DIR}/torchtrtConfig.cmake" + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/torchtrt +) + +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/torchtrtConfig.cmake" + # "${CMAKE_CURRENT_BINARY_DIR}/torchtrtConfigVersion.cmake" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/torchtrt +) diff --git a/Config.cmake.in b/Config.cmake.in new file mode 100644 index 0000000000..7b9f3638db --- /dev/null +++ b/Config.cmake.in @@ -0,0 +1,13 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +find_dependency(Torch) +find_package(TensorRT QUIET) +if (NOT TensorRT_FOUND) + list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/Modules") + find_dependency(TensorRT) +endif() +include("${CMAKE_CURRENT_LIST_DIR}/torchtrtTargets.cmake") + +check_required_components(MathFunctions) diff --git a/cmake/Modules/FindTensorRT.cmake b/cmake/Modules/FindTensorRT.cmake new file mode 100644 index 0000000000..016ec3064c --- /dev/null +++ b/cmake/Modules/FindTensorRT.cmake @@ -0,0 +1,130 @@ +# This module defines the following variables: +# +# :: +# +# TensorRT_INCLUDE_DIRS +# TensorRT_LIBRARIES +# TensorRT_FOUND +# +# :: +# +# TensorRT_VERSION_STRING - version (x.y.z) +# TensorRT_VERSION_MAJOR - major version (x) +# TensorRT_VERSION_MINOR - minor version (y) +# TensorRT_VERSION_PATCH - patch version (z) +# +# Hints +# ^^^^^ +# A user may set ``TensorRT_ROOT`` to an installation root to tell this module where to look. +# +set(_TensorRT_SEARCHES) + +if(TensorRT_ROOT) + set(_TensorRT_SEARCH_ROOT PATHS ${TensorRT_ROOT} NO_DEFAULT_PATH) + list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_ROOT) +endif() + +# appends some common paths +set(_TensorRT_SEARCH_NORMAL + PATHS "/usr" +) +list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_NORMAL) + +# Include dir +foreach(search ${_TensorRT_SEARCHES}) + find_path(TensorRT_INCLUDE_DIR NAMES NvInfer.h ${${search}} PATH_SUFFIXES include) +endforeach() + +if(NOT TensorRT_LIBRARY) + foreach(search ${_TensorRT_SEARCHES}) + find_library(TensorRT_LIBRARY NAMES nvinfer ${${search}} PATH_SUFFIXES lib) + endforeach() +endif() + +if(NOT TensorRT_nvinfer_plugin_LIBRARY) + foreach(search ${_TensorRT_SEARCHES}) + find_library(TensorRT_nvinfer_plugin_LIBRARY NAMES nvinfer_plugin ${${search}} PATH_SUFFIXES lib) + endforeach() +endif() + +mark_as_advanced(TensorRT_INCLUDE_DIR) + +if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInfer.h") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") + + string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") + set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") +endif() + +include(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(TensorRT REQUIRED_VARS TensorRT_LIBRARY TensorRT_INCLUDE_DIR VERSION_VAR TensorRT_VERSION_STRING) + +if(TensorRT_FOUND) + set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR}) + + if(NOT TensorRT_LIBRARIES) + set(TensorRT_LIBRARIES ${TensorRT_LIBRARY}) + if (TensorRT_nvinfer_plugin_LIBRARY) + list(APPEND TensorRT_LIBRARIES ${TensorRT_nvinfer_plugin_LIBRARY}) + endif() + endif() + + if(NOT TARGET TensorRT::TensorRT) + add_library(TensorRT INTERFACE IMPORTED) + add_library(TensorRT::TensorRT ALIAS TensorRT) + endif() + + if(NOT TARGET TensorRT::nvinfer) + add_library(TensorRT::nvinfer SHARED IMPORTED) + if (WIN32) + foreach(search ${_TensorRT_SEARCHES}) + find_file(TensorRT_LIBRARY_DLL + NAMES nvinfer.dll + PATHS ${${search}} + PATH_SUFFIXES bin + ) + endforeach() + + set_target_properties(TensorRT::nvinfer PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}" + IMPORTED_LOCATION "${TensorRT_LIBRARY_DLL}" + IMPORTED_IMPLIB "${TensorRT_LIBRARY}" + ) + else() + set_target_properties(TensorRT::nvinfer PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}" + IMPORTED_LOCATION "${TensorRT_LIBRARY}" + ) + endif() + target_link_libraries(TensorRT INTERFACE TensorRT::nvinfer) + endif() + + if(NOT TARGET TensorRT::nvinfer_plugin AND TensorRT_nvinfer_plugin_LIBRARY) + add_library(TensorRT::nvinfer_plugin SHARED IMPORTED) + if (WIN32) + foreach(search ${_TensorRT_SEARCHES}) + find_file(TensorRT_nvinfer_plugin_LIBRARY_DLL + NAMES nvinfer_plugin.dll + PATHS ${${search}} + PATH_SUFFIXES bin + ) + endforeach() + + set_target_properties(TensorRT::nvinfer_plugin PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}" + IMPORTED_LOCATION "${TensorRT_nvinfer_plugin_LIBRARY_DLL}" + IMPORTED_IMPLIB "${TensorRT_nvinfer_plugin_LIBRARY}" + ) + else() + set_target_properties(TensorRT::nvinfer_plugin PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}" + IMPORTED_LOCATION "${TensorRT_nvinfer_plugin_LIBRARY}" + ) + endif() + target_link_libraries(TensorRT INTERFACE TensorRT::nvinfer_plugin) + endif() +endif() diff --git a/cmake/Modules/FindcuDNN.cmake b/cmake/Modules/FindcuDNN.cmake new file mode 100644 index 0000000000..593a9fcacf --- /dev/null +++ b/cmake/Modules/FindcuDNN.cmake @@ -0,0 +1,243 @@ +# Source: +# https://github.com/arrayfire/arrayfire/blob/master/CMakeModules/FindcuDNN.cmake +# +# Fetched the original content of this file from +# https://github.com/soumith/cudnn.torch +# +# Original Copyright: +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. +# +# Copyright (c) 2021, ArrayFire +# All rights reserved. +# +# This file is distributed under 3-clause BSD license. +# The complete license agreement can be obtained at: +# http://arrayfire.com/licenses/BSD-3-Clause +# +# FindcuDNN +# ------- +# +# Find cuDNN library +# +# This module creates imported target cuDNN::cuDNN upon successfull +# lookup of cuDNN headers and libraries. +# +# Valiables that affect result: +# , , : as usual +# +# Usage +# ----- +# add_exectuable(helloworld main.cpp) +# target_link_libraries(helloworld PRIVATE cuDNN::cuDNN) +# +# Note: It is recommended to avoid using variables set by the find module. +# +# Result variables +# ---------------- +# +# This module will set the following variables in your project: +# +# ``cuDNN_INCLUDE_DIRS`` +# where to find cudnn.h. +# +# ``cuDNN_LINK_LIBRARY`` +# the libraries to link against to use cuDNN. Priot to cuDNN 8, this is a huge monolithic +# library. However, since cuDNN 8 it has been split into multiple shared libraries. If +# cuDNN version 8 if found, this variable contains the shared library that dlopens the +# other libraries: cuDNN_*_INFER_LINK_LIBRARY and cuDNN_*_TRAIN_LINK_LIBRARY as needed. +# For versions of cuDNN 7 or lower, cuDNN_*_INFER_LINK_LIBRARY and cuDNN_*_TRAIN_LINK_LIBRARY +# are not defined. +# +# ``cuDNN_ADV_INFER_LINK_LIBRARY`` +# the libraries to link directly to use advanced inference API from cuDNN. +# ``cuDNN_ADV_INFER_DLL_LIBRARY`` +# Corresponding advanced inference API Windows DLL. This is not set on non-Windows platforms. +# ``cuDNN_ADV_TRAIN_LINK_LIBRARY`` +# the libraries to link directly to use advanced training API from cuDNN. +# ``cuDNN_ADV_TRAIN_DLL_LIBRARY`` +# Corresponding advanced training API Windows DLL. This is not set on non-Windows platforms. +# +# ``cuDNN_CNN_INFER_LINK_LIBRARY`` +# the libraries to link directly to use convolutional nueral networks inference API from cuDNN. +# ``cuDNN_CNN_INFER_DLL_LIBRARY`` +# Corresponding CNN inference API Windows DLL. This is not set on non-Windows platforms. +# ``cuDNN_CNN_TRAIN_LINK_LIBRARY`` +# the libraries to link directly to use convolutional nueral networks training API from cuDNN. +# ``cuDNN_CNN_TRAIN_DLL_LIBRARY`` +# Corresponding CNN training API Windows DLL. This is not set on non-Windows platforms. +# +# ``cuDNN_OPS_INFER_LINK_LIBRARY`` +# the libraries to link directly to use starndard ML operations API from cuDNN. +# ``cuDNN_OPS_INFER_DLL_LIBRARY`` +# Corresponding OPS inference API Windows DLL. This is not set on non-Windows platforms. +# ``cuDNN_OPS_TRAIN_LINK_LIBRARY`` +# the libraries to link directly to use starndard ML operations API from cuDNN. +# ``cuDNN_OPS_TRAIN_DLL_LIBRARY`` +# Corresponding OPS inference API Windows DLL. This is not set on non-Windows platforms. +# +# ``cuDNN_FOUND`` +# If false, do not try to use cuDNN. +# ``cuDNN_VERSION`` +# Version of the cuDNN library found +# ``cuDNN_VERSION_MAJOR`` +# Major Version of the cuDNN library found +# ``cuDNN_VERSION_MINOR`` +# Minor Version of the cuDNN library found + +find_package(PkgConfig) +pkg_check_modules(PC_CUDNN QUIET cuDNN) + +find_package(CUDAToolkit QUIET) + +find_path(cuDNN_INCLUDE_DIRS + NAMES cudnn.h + HINTS + ${cuDNN_ROOT_DIR} + ${PC_CUDNN_INCLUDE_DIRS} + ${CUDA_TOOLKIT_INCLUDE} + PATH_SUFFIXES include + DOC "cuDNN include directory path." ) + +if(cuDNN_INCLUDE_DIRS) + file(READ ${cuDNN_INCLUDE_DIRS}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" + CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + list(LENGTH CUDNN_MAJOR_VERSION cudnn_ver_matches) + if(${cudnn_ver_matches} EQUAL 0) + file(READ ${cuDNN_INCLUDE_DIRS}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" + CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + endif() + string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" + CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") + string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" + CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" + CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") + string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" + CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" + CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}") + set(cuDNN_VERSION_MAJOR ${CUDNN_MAJOR_VERSION}) + set(cuDNN_VERSION_MINOR ${CUDNN_MINOR_VERSION}) + set(cuDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}) +endif() + +# Choose lib suffix to be exact major version if requested +# otherwise, just pick the one read from cudnn.h header +if(cuDNN_FIND_VERSION_EXACT) + set(cudnn_ver_suffix "${cuDNN_FIND_VERSION_MAJOR}") +else() + set(cudnn_ver_suffix "${CUDNN_MAJOR_VERSION}") +endif() + +if(cuDNN_INCLUDE_DIRS) + get_filename_component(libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH) + + macro(af_find_cudnn_libs cudnn_lib_name_infix) + if("${cudnn_lib_name_infix}" STREQUAL "") + set(LIB_INFIX "") + else() + string(TOUPPER ${cudnn_lib_name_infix} LIB_INFIX) + endif() + find_library(cuDNN${LIB_INFIX}_LINK_LIBRARY + NAMES + libcudnn${cudnn_lib_name_infix}.so.${cudnn_ver_suffix} + libcudnn${cudnn_lib_name_infix}.${cudnn_ver_suffix}.dylib + cudnn${cudnn_lib_name_infix} + PATHS + ${cuDNN_ROOT_DIR} + ${PC_CUDNN_LIBRARY_DIRS} + $ENV{LD_LIBRARY_PATH} + ${libpath_cudart} + ${CMAKE_INSTALL_PREFIX} + PATH_SUFFIXES lib lib64 bin lib/x64 bin/x64 + DOC "cudnn${cudnn_lib_name_infix} link library." ) + mark_as_advanced(cuDNN${LIB_INFIX}_LINK_LIBRARY) + + if(WIN32 AND cuDNN_LINK_LIBRARY) + find_file(cuDNN${LIB_INFIX}_DLL_LIBRARY + NAMES cudnn${cudnn_lib_name_infix}64_${cudnn_ver_suffix}${CMAKE_SHARED_LIBRARY_SUFFIX} + PATHS + ${cuDNN_ROOT_DIR} + ${PC_CUDNN_LIBRARY_DIRS} + $ENV{PATH} + ${libpath_cudart} + ${CMAKE_INSTALL_PREFIX} + PATH_SUFFIXES lib lib64 bin lib/x64 bin/x64 + DOC "cudnn${cudnn_lib_name_infix} Windows DLL." ) + mark_as_advanced(cuDNN${LIB_INFIX}_DLL_LIBRARY) + endif() + endmacro() + + af_find_cudnn_libs("") # gets base cudnn shared library + if(cuDNN_VERSION_MAJOR VERSION_GREATER 8 OR cuDNN_VERSION_MAJOR VERSION_EQUAL 8) + af_find_cudnn_libs("_adv_infer") + af_find_cudnn_libs("_adv_train") + af_find_cudnn_libs("_cnn_infer") + af_find_cudnn_libs("_cnn_train") + af_find_cudnn_libs("_ops_infer") + af_find_cudnn_libs("_ops_train") + endif() +endif() + +# pytorch compatibility layer +set(CUDNN_LIBRARY_PATH ${cuDNN_LINK_LIBRARY}) +set(CUDNN_INCLUDE_PATH ${cuDNN_INCLUDE_DIRS}) + +find_package_handle_standard_args(cuDNN + REQUIRED_VARS cuDNN_LINK_LIBRARY cuDNN_INCLUDE_DIRS + VERSION_VAR cuDNN_VERSION) + +mark_as_advanced(cuDNN_LINK_LIBRARY cuDNN_INCLUDE_DIRS cuDNN_DLL_LIBRARY) + +if(cuDNN_FOUND) + if(NOT TARGET cuDNN::cuDNN) + add_library(cuDNN::cuDNN SHARED IMPORTED) + if(WIN32) + set_target_properties(cuDNN::cuDNN + PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGE "C" + INTERFACE_INCLUDE_DIRECTORIES "${cuDNN_INCLUDE_DIRS}" + IMPORTED_LOCATION "${cuDNN_DLL_LIBRARY}" + IMPORTED_IMPLIB "${cuDNN_LINK_LIBRARY}" + ) + else(WIN32) + set_target_properties(cuDNN::cuDNN + PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGE "C" + INTERFACE_INCLUDE_DIRECTORIES "${cuDNN_INCLUDE_DIRS}" + IMPORTED_LOCATION "${cuDNN_LINK_LIBRARY}" + ) + endif(WIN32) + if(cuDNN_VERSION_MAJOR VERSION_GREATER 8 OR cuDNN_VERSION_MAJOR VERSION_EQUAL 8) + macro(create_cudnn_target cudnn_target_name) + string(TOUPPER ${cudnn_target_name} target_infix) + add_library(cuDNN::${cudnn_target_name} SHARED IMPORTED) + if(WIN32) + set_target_properties(cuDNN::${cudnn_target_name} + PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGE "C" + INTERFACE_INCLUDE_DIRECTORIES "${cuDNN_INCLUDE_DIRS}" + IMPORTED_LOCATION "${cuDNN_${target_infix}_DLL_LIBRARY}" + IMPORTED_IMPLIB "${cuDNN_${target_infix}_LINK_LIBRARY}" + ) + else(WIN32) + set_target_properties(cuDNN::${cudnn_target_name} + PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGE "C" + INTERFACE_INCLUDE_DIRECTORIES "${cuDNN_INCLUDE_DIRS}" + IMPORTED_LOCATION "${cuDNN_${target_infix}_LINK_LIBRARY}" + ) + endif(WIN32) + endmacro() + create_cudnn_target(adv_infer) + create_cudnn_target(adv_train) + create_cudnn_target(cnn_infer) + create_cudnn_target(cnn_train) + create_cudnn_target(ops_infer) + create_cudnn_target(ops_train) + endif() + endif(NOT TARGET cuDNN::cuDNN) +endif(cuDNN_FOUND) diff --git a/cmake/build_options.cmake b/cmake/build_options.cmake new file mode 100644 index 0000000000..210bdf6813 --- /dev/null +++ b/cmake/build_options.cmake @@ -0,0 +1,28 @@ +# set CMAKE_BUILD_TYPE default value +if(NOT CMAKE_CONFIGURATION_TYPES) + if("${CMAKE_BUILD_TYPE}" STREQUAL "") + set(CMAKE_BUILD_TYPE + "Release" + CACHE STRING "Build configuration" FORCE) + endif() +endif() + +# validate CMAKE_BUILD_TYPE against default CMake build types +set(VALID_BUILD_TYPES "Release" "Debug" "RelWithDebInfo" "MinSizeRel") +if(NOT CMAKE_CONFIGURATION_TYPES) + list(FIND VALID_BUILD_TYPES "${CMAKE_BUILD_TYPE}" INDEX) + if(${INDEX} MATCHES -1) + message( + FATAL_ERROR + "Invalid build type. Valid types are [${VALID_BUILD_TYPES}]") + endif() +endif() + +if(NOT CMAKE_CONFIGURATION_TYPES) + if(DEFINED CMAKE_BUILD_TYPE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS + ${VALID_BUILD_TYPES}) + endif() +endif() + + diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake new file mode 100644 index 0000000000..5d8b1c15cd --- /dev/null +++ b/cmake/dependencies.cmake @@ -0,0 +1,27 @@ +# TensorRT +find_package(TensorRT QUIET) +if (NOT TensorRT_FOUND) + list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules") + find_package(TensorRT REQUIRED) +endif() + +# If the custom finders are needed at this point, there are good chances that they will be needed when consuming the library as well +install(FILES "${CMAKE_SOURCE_DIR}/cmake/Modules/FindTensorRT.cmake" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/torchtrt/Modules") +install(FILES "${CMAKE_SOURCE_DIR}/cmake/Modules/FindcuDNN.cmake" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/torchtrt/Modules") + +# CUDA +find_package(CUDAToolkit REQUIRED) +find_package(cuDNN REQUIRED) # Headers are needed somewhere + +# libtorch +find_package(Torch REQUIRED) +find_package(Threads REQUIRED) + +add_definitions(-DTORCH_VERSION_MAJOR=${Torch_VERSION_MAJOR}) +add_definitions(-DTORCH_VERSION_MINOR=${Torch_VERSION_MINOR}) +add_definitions(-DTORCH_VERSION_PATCH=${Torch_VERSION_PATCH}) + +if (WITH_TESTS) + include(FetchContent) + include(${CMAKE_SOURCE_DIR}/third_party/googletest/googletest.cmake) +endif() diff --git a/cmake/paths.cmake b/cmake/paths.cmake new file mode 100644 index 0000000000..df07ddf63d --- /dev/null +++ b/cmake/paths.cmake @@ -0,0 +1,12 @@ +include(GNUInstallDirs) + +#Set output directory names for libraries and binaries +set(LIBRARY_OUTPUT_DIRECTORY "lib") +set(ARCHIVE_OUTPUT_DIRECTORY "lib") +set(RUNTIME_OUTPUT_DIRECTORY "bin") +set(HEADERS_OUTPUT_DIRECTORY "include") + +#Set target ouput directory in the build directory +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${ARCHIVE_OUTPUT_DIRECTORY}") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${LIBRARY_OUTPUT_DIRECTORY}") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${RUNTIME_OUTPUT_DIRECTORY}") \ No newline at end of file diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt new file mode 100644 index 0000000000..ed66257ee0 --- /dev/null +++ b/core/CMakeLists.txt @@ -0,0 +1,48 @@ +set(lib_name "core") +add_library(${lib_name} OBJECT) + +set(CXX_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/compiler.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/compiler.h" +) + +target_sources(${lib_name} + PRIVATE + ${CXX_SRCS} + PUBLIC + $ + $ + $ + $ + $ +) + +target_link_libraries(${lib_name} + PUBLIC + torch + TensorRT::nvinfer + core_runtime + core_conversion + core_lowering + core_partitioning + core_util_logging +) + +target_include_directories(${lib_name} + PRIVATE + "$" +) + +add_subdirectory(util) +add_subdirectory(conversion) +add_subdirectory(runtime) +add_subdirectory(lowering) +add_subdirectory(partitioning) +add_subdirectory(plugins) +add_subdirectory(ir) + +# Install +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/") diff --git a/core/conversion/CMakeLists.txt b/core/conversion/CMakeLists.txt new file mode 100644 index 0000000000..dd8853f390 --- /dev/null +++ b/core/conversion/CMakeLists.txt @@ -0,0 +1,40 @@ +set(lib_name "core_conversion") +add_library(${lib_name} OBJECT) + +set(CXX_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/conversion.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/conversion_ignorelist.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/conversion.h" +) + +target_sources(${lib_name} + PRIVATE + ${CXX_SRCS} + PUBLIC + $ + $ +) + +target_link_libraries(${lib_name} + PUBLIC + TensorRT::nvinfer + torch + core_ir + core_util +) +target_include_directories(${lib_name} + PUBLIC "$" +) + +# add sublibraries +add_subdirectory(conversionctx) +add_subdirectory(converters) +add_subdirectory(evaluators) +add_subdirectory(tensorcontainer) +add_subdirectory(var) + +# Install +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/conversion") diff --git a/core/conversion/conversionctx/CMakeLists.txt b/core/conversion/conversionctx/CMakeLists.txt new file mode 100644 index 0000000000..8a642561b3 --- /dev/null +++ b/core/conversion/conversionctx/CMakeLists.txt @@ -0,0 +1,12 @@ +set(sub_lib_name "conversionctx") + +target_sources(${lib_name} + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/ConversionCtx.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/ConversionCtx.h" +) + +# Install headers +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/conversion/${sub_lib_name}") \ No newline at end of file diff --git a/core/conversion/converters/CMakeLists.txt b/core/conversion/converters/CMakeLists.txt new file mode 100644 index 0000000000..c94c8fccf4 --- /dev/null +++ b/core/conversion/converters/CMakeLists.txt @@ -0,0 +1,46 @@ +set(sub_lib_name "converters") + +target_sources(${lib_name} + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/NodeConverterRegistry.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/converter_util.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/Weights.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/activation.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/batch_norm.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/cast.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/concat.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/constant.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/constant_pad.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/conv_deconv.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/cumsum.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/element_wise.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/expand.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/interpolate.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/layer_norm.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/linear.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/lstm_cell.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/matrix_multiply.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/max.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/normalize.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/pooling.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/quantization.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/reduce.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/reflection_pad.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/replication_pad.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/select.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/shuffle.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/softmax.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/squeeze.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/stack.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/topk.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/unary.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/unsqueeze.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/Weights.h" + "${CMAKE_CURRENT_SOURCE_DIR}/converters.h" + "${CMAKE_CURRENT_SOURCE_DIR}/converter_util.h" +) + +# Install headers +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/conversion/${sub_lib_name}") diff --git a/core/conversion/evaluators/CMakeLists.txt b/core/conversion/evaluators/CMakeLists.txt new file mode 100644 index 0000000000..2285440784 --- /dev/null +++ b/core/conversion/evaluators/CMakeLists.txt @@ -0,0 +1,15 @@ +set(sub_lib_name "evaluators") + +target_sources(${lib_name} + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/NodeEvaluatorRegistry.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/aten.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/eval_util.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/prim.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/evaluators.h" +) + +# Install headers +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/conversion/${sub_lib_name}") \ No newline at end of file diff --git a/core/conversion/tensorcontainer/CMakeLists.txt b/core/conversion/tensorcontainer/CMakeLists.txt new file mode 100644 index 0000000000..e92f1afa3c --- /dev/null +++ b/core/conversion/tensorcontainer/CMakeLists.txt @@ -0,0 +1,12 @@ +set(sub_lib_name "tensorcontainer") + +target_sources(${lib_name} + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/TensorContainer.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/TensorContainer.h" +) + +# Install headers +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/conversion/${sub_lib_name}") \ No newline at end of file diff --git a/core/conversion/var/CMakeLists.txt b/core/conversion/var/CMakeLists.txt new file mode 100644 index 0000000000..200d093b4c --- /dev/null +++ b/core/conversion/var/CMakeLists.txt @@ -0,0 +1,13 @@ +set(sub_lib_name "var") + +target_sources(${lib_name} + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/Var.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/Var.h" + "${CMAKE_CURRENT_SOURCE_DIR}/Var_inl.h" +) + +# Install headers +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/conversion/${sub_lib_name}") \ No newline at end of file diff --git a/core/ir/CMakeLists.txt b/core/ir/CMakeLists.txt new file mode 100644 index 0000000000..1560888710 --- /dev/null +++ b/core/ir/CMakeLists.txt @@ -0,0 +1,29 @@ +set(lib_name "core_ir") +add_library(${lib_name} OBJECT) + +target_sources(${lib_name} + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/Input.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/ir.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/StaticParams.cpp" + $ +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/ir.h" +) + +target_include_directories(${lib_name} + PRIVATE + "$" +) + +target_link_libraries(${lib_name} + PUBLIC + TensorRT::nvinfer + torch + PRIVATE + core_util +) + +# Install headers +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/ir/") diff --git a/core/lowering/CMakeLists.txt b/core/lowering/CMakeLists.txt new file mode 100644 index 0000000000..4544ffd18d --- /dev/null +++ b/core/lowering/CMakeLists.txt @@ -0,0 +1,37 @@ +set(lib_name "core_lowering") +add_library(${lib_name} OBJECT) + +set(CXX_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/drop_unused_nodes.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/lowering.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/register_trt_placeholder_ops.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/LowerInfo.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/lowering.h" +) + +target_sources(${lib_name} + PRIVATE + ${CXX_SRCS} + $ +) + +target_include_directories(${lib_name} + PRIVATE + "$" +) + +target_link_libraries(${lib_name} + PUBLIC + torch + PRIVATE + core_util +) + +# add sublibraries +add_subdirectory(passes) + +# Install +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/lowering") diff --git a/core/lowering/passes/CMakeLists.txt b/core/lowering/passes/CMakeLists.txt new file mode 100644 index 0000000000..3c540f5768 --- /dev/null +++ b/core/lowering/passes/CMakeLists.txt @@ -0,0 +1,33 @@ +target_sources(${lib_name} + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/convNd_to_convolution.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/exception_elimination.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/fuse_addmm_branches.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/linear_to_addmm.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/module_fallback.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/op_aliasing.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reduce_gelu.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reduce_remainder.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/reduce_to.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/remove_bn_dim_check.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/remove_contiguous.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/remove_dropout.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/remove_nops.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/remove_set_attrs.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/remove_unnecessary_casts.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/silu_to_sigmoid_multiplication.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/unpack_addmm.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/unpack_batch_norm.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/unpack_hardswish.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/unpack_log_softmax.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/unpack_std.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/unpack_var.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/view_to_reshape.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/passes.h" +) + +# Install headers +set(sub_lib_name "passes") +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/lowering/${sub_lib_name}") diff --git a/core/lowering/register_trt_placeholder_ops.cpp b/core/lowering/register_trt_placeholder_ops.cpp index 17d7d3f47a..5ba8171208 100644 --- a/core/lowering/register_trt_placeholder_ops.cpp +++ b/core/lowering/register_trt_placeholder_ops.cpp @@ -10,10 +10,7 @@ c10::AliasAnalysisKind aliasAnalysisFromSchema() { RegisterOperators trt_placeholder_ops_reg({ /// Op marks a Tensor to be conveted from an Torch Tensor /// to a TRT constant Tensor - Operator( - "trt::const(Tensor val) -> Tensor", - [](Stack& stack) { /*noop*/ }, - aliasAnalysisFromSchema()), + Operator("trt::const(Tensor val) -> Tensor", [](Stack& stack) { /*noop*/ }, aliasAnalysisFromSchema()), }); } // namespace jit diff --git a/core/partitioning/CMakeLists.txt b/core/partitioning/CMakeLists.txt new file mode 100644 index 0000000000..6e12a833c0 --- /dev/null +++ b/core/partitioning/CMakeLists.txt @@ -0,0 +1,33 @@ +set(lib_name "core_partitioning") +add_library(${lib_name} OBJECT) + +target_sources(${lib_name} + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/SegmentedBlock.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/shape_analysis.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/partitioning.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/PartitionInfo.cpp" + $ + PUBLIC $ + $ +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/SegmentedBlock.h" + "${CMAKE_CURRENT_SOURCE_DIR}/shape_analysis.h" + "${CMAKE_CURRENT_SOURCE_DIR}/PartitionInfo.h" + "${CMAKE_CURRENT_SOURCE_DIR}/partitioning.h" +) + +target_include_directories(${lib_name} PUBLIC "$") +target_link_libraries(${lib_name} + PUBLIC + torch + TensorRT::nvinfer + core_ir + core_util + PRIVATE + core_conversion +) + +# Install headers +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/partitioning/") diff --git a/core/partitioning/shape_analysis.cpp b/core/partitioning/shape_analysis.cpp index d24b1f980a..49bbab0b36 100644 --- a/core/partitioning/shape_analysis.cpp +++ b/core/partitioning/shape_analysis.cpp @@ -1,168 +1,169 @@ -#include "core/partitioning/shape_analysis.h" -#include -#include "core/util/prelude.h" -#include "torch/csrc/jit/api/module.h" -#include "torch/csrc/jit/passes/constant_pooling.h" - -namespace torch_tensorrt { -namespace core { -namespace partitioning { - -std::unordered_map generateRandomInputs( - std::unordered_map& inputs, - std::unordered_map>& types) { - // generate random inputs for running pytorch segments - std::unordered_map ivalue_map; - - uint64_t in_i = 0; - for (auto& input : inputs) { - auto cur_shape = input.second.input_shape; - std::vector shape; - shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims); - auto type_opt = types[input.first]; - auto type = at::kFloat; - if (type_opt) { - type = type_opt.value(); - } else { - LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32"); - } - auto in = at::randint(5, shape, {at::kCUDA}).to(type); - ivalue_map[input.first] = in.clone(); - in_i++; - } - return ivalue_map; -} - -void getSegmentsOutputByRunning( - SegmentedBlock& seg_block, - std::unordered_map& ivalues_maps, - const PartitionInfo& partition_info) { - // create a module to run the graph - auto g = seg_block.g(); - auto copy_g = g->copy(); - - // create tuple for multiple outputs - if (seg_block.raw_outputs().size() > 1) { - auto new_output_node = copy_g->appendNode(copy_g->createTuple(copy_g->outputs())); - for (int idx = copy_g->outputs().size() - 1; idx >= 0; --idx) { - copy_g->eraseOutput(idx); - } - - copy_g->registerOutput(new_output_node->outputs()[0]); - } - - torch::jit::script::Module cur_mod(c10::QualifiedName("module")); - - auto self = copy_g->insertInput(0, "self_1"); - self->setType(cur_mod.type()); - - auto cur_method = cur_mod._ivalue()->compilation_unit()->create_function(c10::QualifiedName("forward"), copy_g); - auto schema = util::GenerateGraphSchema(cur_method->name(), copy_g); - cur_mod.type()->addMethod(cur_method); - cur_method->setSchema(schema); - - std::vector jit_inputs_ivalues; - - // set inputs ivalues, now supports Tensor/Int to pass argumentes between different segments - for (auto& input : seg_block.raw_inputs()) { - TORCHTRT_CHECK( - ivalues_maps.count(input), - "Could not find torch::jit::Value* " << input->debugName() << " produced from " - << util::node_info(input->node()) - << " in lowering graph for mini graph input.\n"); - if (input->node()->kind() == torch::jit::prim::Param) { - jit_inputs_ivalues.push_back(ivalues_maps[input]); - } else if (input->type()->isSubtypeOf(torch::jit::TensorType::get())) { - jit_inputs_ivalues.push_back(ivalues_maps[input].toTensor()); - } else if (input->type()->isSubtypeOf(torch::jit::IntType::get())) { - jit_inputs_ivalues.push_back(ivalues_maps[input].toInt()); - } else if (input->type()->isSubtypeOf(torch::jit::BoolType::get())) { - jit_inputs_ivalues.push_back(ivalues_maps[input].toBool()); - } else if (input->type()->kind() == torch::jit::TypeKind::ListType) { - jit_inputs_ivalues.push_back(ivalues_maps[input].toList()); - } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) { - jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple()); - } else if (input->type()->kind() == torch::jit::TypeKind::NumberType) { - jit_inputs_ivalues.push_back(ivalues_maps[input].toScalar()); - } else if (input->type()->kind() == torch::jit::TypeKind::DictType) { - jit_inputs_ivalues.push_back(ivalues_maps[input].toGenericDict()); - } else if (input->type()->kind() == torch::jit::TypeKind::DeviceObjType) { - jit_inputs_ivalues.push_back(ivalues_maps[input].toDevice()); - } else { - TORCHTRT_THROW_ERROR( - "Expected to find type " << input->type()->str() << " for value " << input->debugName() - << " but get nothing. "); - } - } - - // run segments to get outputs for later segments input shape, and other arguments such as Int - std::vector jit_results; - torch::jit::IValue jit_results_ivalues = cur_mod.forward(jit_inputs_ivalues); - - if (jit_results_ivalues.isTuple()) { - auto results = jit_results_ivalues.toTuple()->elements(); - for (auto r : results) { - jit_results.push_back(r); - } - } else { - jit_results.push_back(jit_results_ivalues); - } - - size_t idx = 0; - for (auto& output : seg_block.raw_outputs()) { - ivalues_maps[output] = jit_results[idx++]; - } - - // set input shape for each segmented block so we wil use it in conversion process - std::vector input_shapes; - std::vector input_types; - for (auto& i : seg_block.raw_inputs()) { - if (ivalues_maps[i].isTensor()) { - // set the input_shape and data_type - // we can use a temp value here instead of replacing the values in ivalues_map since we only use ivalues_map for - // shape inference - auto cur_ivalue = ivalues_maps[i]; - at::ScalarType t = cur_ivalue.toTensor().scalar_type(); - if (!partition_info.truncate_long_and_double && (t == at::kLong || t == at::kDouble)) { - TORCHTRT_THROW_ERROR( - "Unable to process subgraph input type of at::kLong/at::kDouble, try to compile model with truncate_long_and_double enabled"); - } else if (partition_info.truncate_long_and_double && t == at::kLong) { - cur_ivalue = cur_ivalue.toTensor().to(at::kInt); - LOG_WARNING("Truncating graph input type from at::kLong to at::kInt"); - } else if (partition_info.truncate_long_and_double && t == at::kDouble) { - cur_ivalue = cur_ivalue.toTensor().to(at::kFloat); - LOG_WARNING("Truncating graph input type from at::kDouble to at::kFloat"); - } - c10::optional dtype = util::optTypeMetaToTRTDataType(cur_ivalue.toTensor().dtype()); - if (dtype == c10::nullopt) { - TORCHTRT_THROW_ERROR("Unsupported input data type " << cur_ivalue.toTensor().dtype()); - } - if (cur_ivalue.toTensor().sizes().size() == 0) { - // handle Scalar types, which has sizes of [] - input_shapes.push_back(util::toVec(util::toDims(c10::List({1})))); - } else { - input_shapes.push_back(util::toVec(util::toDims(cur_ivalue.toTensor().sizes()))); - } - input_types.push_back(cur_ivalue.toTensor().scalar_type()); - } - } - - seg_block.register_inshapes(input_shapes); - seg_block.register_intypes(input_types); -} - -void runShapeAnalysis( - std::vector& segmented_blocks, - std::unordered_map& example_tensor_map, - const PartitionInfo& partition_info) { - // register every segment's input shape, and it's running output IValues - for (auto& seg_block : segmented_blocks) { - torch::jit::ConstantPooling(seg_block.g()); - getSegmentsOutputByRunning(seg_block, example_tensor_map, partition_info); - } - return; -} - -} // namespace partitioning -} // namespace core -} // namespace torch_tensorrt +#include "core/partitioning/shape_analysis.h" +#include +#include +#include "core/util/prelude.h" +#include "torch/csrc/jit/api/module.h" +#include "torch/csrc/jit/passes/constant_pooling.h" + +namespace torch_tensorrt { +namespace core { +namespace partitioning { + +std::unordered_map generateRandomInputs( + std::unordered_map& inputs, + std::unordered_map>& types) { + // generate random inputs for running pytorch segments + std::unordered_map ivalue_map; + + uint64_t in_i = 0; + for (auto& input : inputs) { + auto cur_shape = input.second.input_shape; + std::vector shape; + shape.insert(shape.begin(), std::begin(cur_shape.d), std::begin(cur_shape.d) + cur_shape.nbDims); + auto type_opt = types[input.first]; + auto type = at::kFloat; + if (type_opt) { + type = type_opt.value(); + } else { + LOG_WARNING("Input type for doing shape analysis could not be determined, defaulting to F32"); + } + auto in = at::randint(5, shape, {at::kCUDA}).to(type); + ivalue_map[input.first] = in.clone(); + in_i++; + } + return ivalue_map; +} + +void getSegmentsOutputByRunning( + SegmentedBlock& seg_block, + std::unordered_map& ivalues_maps, + const PartitionInfo& partition_info) { + // create a module to run the graph + auto g = seg_block.g(); + auto copy_g = g->copy(); + + // create tuple for multiple outputs + if (seg_block.raw_outputs().size() > 1) { + auto new_output_node = copy_g->appendNode(copy_g->createTuple(copy_g->outputs())); + for (int idx = copy_g->outputs().size() - 1; idx >= 0; --idx) { + copy_g->eraseOutput(idx); + } + + copy_g->registerOutput(new_output_node->outputs()[0]); + } + + torch::jit::script::Module cur_mod(c10::QualifiedName("module")); + + auto self = copy_g->insertInput(0, "self_1"); + self->setType(cur_mod.type()); + + auto cur_method = cur_mod._ivalue()->compilation_unit()->create_function(c10::QualifiedName("forward"), copy_g); + auto schema = util::GenerateGraphSchema(cur_method->name(), copy_g); + cur_mod.type()->addMethod(cur_method); + cur_method->setSchema(schema); + + std::vector jit_inputs_ivalues; + + // set inputs ivalues, now supports Tensor/Int to pass argumentes between different segments + for (auto& input : seg_block.raw_inputs()) { + TORCHTRT_CHECK( + ivalues_maps.count(input), + "Could not find torch::jit::Value* " << input->debugName() << " produced from " + << util::node_info(input->node()) + << " in lowering graph for mini graph input.\n"); + if (input->node()->kind() == torch::jit::prim::Param) { + jit_inputs_ivalues.push_back(ivalues_maps[input]); + } else if (input->type()->isSubtypeOf(torch::jit::TensorType::get())) { + jit_inputs_ivalues.push_back(ivalues_maps[input].toTensor()); + } else if (input->type()->isSubtypeOf(torch::jit::IntType::get())) { + jit_inputs_ivalues.push_back(ivalues_maps[input].toInt()); + } else if (input->type()->isSubtypeOf(torch::jit::BoolType::get())) { + jit_inputs_ivalues.push_back(ivalues_maps[input].toBool()); + } else if (input->type()->kind() == torch::jit::TypeKind::ListType) { + jit_inputs_ivalues.push_back(ivalues_maps[input].toList()); + } else if (input->type()->kind() == torch::jit::TypeKind::TupleType) { + jit_inputs_ivalues.push_back(ivalues_maps[input].toTuple()); + } else if (input->type()->kind() == torch::jit::TypeKind::NumberType) { + jit_inputs_ivalues.push_back(ivalues_maps[input].toScalar()); + } else if (input->type()->kind() == torch::jit::TypeKind::DictType) { + jit_inputs_ivalues.push_back(ivalues_maps[input].toGenericDict()); + } else if (input->type()->kind() == torch::jit::TypeKind::DeviceObjType) { + jit_inputs_ivalues.push_back(ivalues_maps[input].toDevice()); + } else { + TORCHTRT_THROW_ERROR( + "Expected to find type " << input->type()->str() << " for value " << input->debugName() + << " but get nothing. "); + } + } + + // run segments to get outputs for later segments input shape, and other arguments such as Int + std::vector jit_results; + torch::jit::IValue jit_results_ivalues = cur_mod.forward(jit_inputs_ivalues); + + if (jit_results_ivalues.isTuple()) { + auto results = jit_results_ivalues.toTuple()->elements(); + for (auto r : results) { + jit_results.push_back(r); + } + } else { + jit_results.push_back(jit_results_ivalues); + } + + size_t idx = 0; + for (auto& output : seg_block.raw_outputs()) { + ivalues_maps[output] = jit_results[idx++]; + } + + // set input shape for each segmented block so we wil use it in conversion process + std::vector input_shapes; + std::vector input_types; + for (auto& i : seg_block.raw_inputs()) { + if (ivalues_maps[i].isTensor()) { + // set the input_shape and data_type + // we can use a temp value here instead of replacing the values in ivalues_map since we only use ivalues_map for + // shape inference + auto cur_ivalue = ivalues_maps[i]; + at::ScalarType t = cur_ivalue.toTensor().scalar_type(); + if (!partition_info.truncate_long_and_double && (t == at::kLong || t == at::kDouble)) { + TORCHTRT_THROW_ERROR( + "Unable to process subgraph input type of at::kLong/at::kDouble, try to compile model with truncate_long_and_double enabled"); + } else if (partition_info.truncate_long_and_double && t == at::kLong) { + cur_ivalue = cur_ivalue.toTensor().to(at::kInt); + LOG_WARNING("Truncating graph input type from at::kLong to at::kInt"); + } else if (partition_info.truncate_long_and_double && t == at::kDouble) { + cur_ivalue = cur_ivalue.toTensor().to(at::kFloat); + LOG_WARNING("Truncating graph input type from at::kDouble to at::kFloat"); + } + c10::optional dtype = util::optTypeMetaToTRTDataType(cur_ivalue.toTensor().dtype()); + if (dtype == c10::nullopt) { + TORCHTRT_THROW_ERROR("Unsupported input data type " << cur_ivalue.toTensor().dtype()); + } + if (cur_ivalue.toTensor().sizes().size() == 0) { + // handle Scalar types, which has sizes of [] + input_shapes.push_back(util::toVec(util::toDims(c10::List({1})))); + } else { + input_shapes.push_back(util::toVec(util::toDims(cur_ivalue.toTensor().sizes()))); + } + input_types.push_back(cur_ivalue.toTensor().scalar_type()); + } + } + + seg_block.register_inshapes(input_shapes); + seg_block.register_intypes(input_types); +} + +void runShapeAnalysis( + std::vector& segmented_blocks, + std::unordered_map& example_tensor_map, + const PartitionInfo& partition_info) { + // register every segment's input shape, and it's running output IValues + for (auto& seg_block : segmented_blocks) { + torch::jit::ConstantPooling(seg_block.g()); + getSegmentsOutputByRunning(seg_block, example_tensor_map, partition_info); + } + return; +} + +} // namespace partitioning +} // namespace core +} // namespace torch_tensorrt diff --git a/core/plugins/CMakeLists.txt b/core/plugins/CMakeLists.txt new file mode 100644 index 0000000000..1f537ec306 --- /dev/null +++ b/core/plugins/CMakeLists.txt @@ -0,0 +1,38 @@ +set(lib_name "core_plugins") +add_library(${lib_name} OBJECT) + +target_sources(${lib_name} + PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/impl/interpolate_plugin.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/normalize_plugin.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/register_plugins.cpp" + PUBLIC $ +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/plugins.h" +) + +target_include_directories(${lib_name} + PUBLIC + "$" +) + +target_link_libraries(${lib_name} + PUBLIC + TensorRT::nvinfer + TensorRT::nvinfer_plugin + torch + core_util + PRIVATE + Threads::Threads +) + +# Install headers +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/plugins/") +install( + FILES + "${CMAKE_CURRENT_SOURCE_DIR}/impl/interpolate_plugin.h" + "${CMAKE_CURRENT_SOURCE_DIR}/impl/normalize_plugin.h" + DESTINATION + "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/plugins/impl" +) diff --git a/core/runtime/CMakeLists.txt b/core/runtime/CMakeLists.txt new file mode 100644 index 0000000000..953f76c903 --- /dev/null +++ b/core/runtime/CMakeLists.txt @@ -0,0 +1,35 @@ +set(lib_name "core_runtime") +add_library(${lib_name} OBJECT) + +set(CXX_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/CudaDevice.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/DeviceList.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/execute_engine.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/TRTEngine.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/register_jit_hooks.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/runtime.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/runtime.h" +) + +target_sources(${lib_name} + PRIVATE ${CXX_SRCS} + $ +) + +target_include_directories(${lib_name} + PUBLIC + "$" +) + +target_link_libraries(${lib_name} + PUBLIC + TensorRT::nvinfer + torch + core_util +) + +# Install +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/runtime") diff --git a/core/runtime/runtime.h b/core/runtime/runtime.h index 635e7acc59..22393954d4 100644 --- a/core/runtime/runtime.h +++ b/core/runtime/runtime.h @@ -26,6 +26,8 @@ struct CudaDevice { CudaDevice(); CudaDevice(int64_t gpu_id, nvinfer1::DeviceType device_type); CudaDevice(std::string serialized_device_info); + ~CudaDevice() = default; + CudaDevice(const CudaDevice& other) = default; CudaDevice& operator=(const CudaDevice& other); std::string serialize(); std::string getSMCapability() const; diff --git a/core/util/CMakeLists.txt b/core/util/CMakeLists.txt new file mode 100644 index 0000000000..317c6e87c2 --- /dev/null +++ b/core/util/CMakeLists.txt @@ -0,0 +1,40 @@ +set(lib_name "core_util") +add_library(${lib_name} OBJECT) + +set(CXX_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/Exception.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/trt_util.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/Exception.h" + "${CMAKE_CURRENT_SOURCE_DIR}/build_info.h" + "${CMAKE_CURRENT_SOURCE_DIR}/jit_util.h" + "${CMAKE_CURRENT_SOURCE_DIR}/macros.h" + "${CMAKE_CURRENT_SOURCE_DIR}/prelude.h" + "${CMAKE_CURRENT_SOURCE_DIR}/trt_util.h" +) + +target_sources(${lib_name} + PRIVATE + ${CXX_SRCS} + PUBLIC + "$" +) + +target_include_directories(${lib_name} + PUBLIC + "$" +) + +target_link_libraries(${lib_name} + PUBLIC + TensorRT::nvinfer + torch + core_util_logging +) + +# Install +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/util") + +add_subdirectory(logging) \ No newline at end of file diff --git a/core/util/logging/CMakeLists.txt b/core/util/logging/CMakeLists.txt new file mode 100644 index 0000000000..31374d8141 --- /dev/null +++ b/core/util/logging/CMakeLists.txt @@ -0,0 +1,25 @@ +set(lib_name "core_util_logging") +add_library(${lib_name} OBJECT) + +target_sources(${lib_name} + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/TorchTRTLogger.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/TorchTRTLogger.h" +) + +target_include_directories(${lib_name} + PRIVATE + "$" +) + +target_link_libraries(${lib_name} + PUBLIC + TensorRT::nvinfer + torch +) + +# Install headers +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/core/util/logging") diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt new file mode 100644 index 0000000000..a0380fe5ce --- /dev/null +++ b/cpp/CMakeLists.txt @@ -0,0 +1,157 @@ +set(lib_name "torch_tensorrt") +add_library(${lib_name} OBJECT) + +set(CXX_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp" +) + +set(HEADER_FILES + "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h" + "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h" +) + +target_sources(${lib_name} + PRIVATE + ${CXX_SRCS} + $ +) + +target_link_libraries(${lib_name} + PUBLIC + torch + TensorRT::nvinfer + core +) + +target_compile_definitions(${lib_name} PUBLIC "USE_CMAKE_GENERATED_EXPORT_HEADER") + +include("GenerateExportHeader") +generate_export_header(${lib_name} + EXPORT_MACRO_NAME TORCHTRT_API + NO_EXPORT_MACRO_NAME TORCHTRT_HIDDEN +) + +target_include_directories(${lib_name} + PUBLIC + "$" + "$" # _export.h + PRIVATE + "$" +) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${lib_name}_export.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + +######################## +# API library torchtrt # +######################## +set(CMAKE_INSTALL_RPATH "$ORIGIN") + +set(torchtrt_lib_name "torchtrt") + +add_library(${torchtrt_lib_name} SHARED) + +if(NOT MSVC) + set_target_properties( + ${torchtrt_lib_name} + PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE # + LINK_FLAGS "-Wl,--disable-new-dtags") +endif() + +target_sources(${torchtrt_lib_name} + PRIVATE + $ +) + +target_link_libraries(${torchtrt_lib_name} + PUBLIC + TensorRT::TensorRT + torch + PRIVATE + torch_tensorrt + core +) + +target_include_directories(${torchtrt_lib_name} + PUBLIC + # enable include from core from the tests + "$" + # API headers + "$" + "$" # _export.h + "$" +) + +install(FILES ${HEADER_FILES} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/torch_tensorrt/") + +################################ +# API library torchtrt_runtime # +################################ +set(runtime_lib_name "torchtrt_runtime") +add_library(${runtime_lib_name} SHARED) + +if(NOT MSVC) + set_target_properties( + ${runtime_lib_name} + PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE # + LINK_FLAGS "-Wl,--disable-new-dtags") +endif() + +target_sources(${runtime_lib_name} + PRIVATE + $ + $ +) + +target_link_libraries(${runtime_lib_name} + PUBLIC + TensorRT::TensorRT + torch + PRIVATE + core_runtime + core_plugins +) + +################################ +# API library torchtrt_plugins # +################################ +set(plugins_lib_name "torchtrt_plugins") +add_library(${plugins_lib_name} SHARED) + +if(NOT MSVC) + set_target_properties( + ${plugins_lib_name} + PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE # + LINK_FLAGS "-Wl,--disable-new-dtags") +endif() + +target_sources(${plugins_lib_name} + PRIVATE + $ +) + +target_link_libraries(${plugins_lib_name} + PUBLIC + TensorRT::TensorRT + PRIVATE + core_plugins +) + +install( + TARGETS ${torchtrt_lib_name} ${runtime_lib_name} ${plugins_lib_name} torch_tensorrt core core_runtime core_plugins core_util core_util_logging core_lowering core_partitioning core_ir core_conversion + EXPORT ${torchtrt_lib_name}Targets + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") + +install( + EXPORT ${torchtrt_lib_name}Targets + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/torchtrt + COMPONENT ${torchtrt_lib_name}Targets) + +add_subdirectory(bin) diff --git a/cpp/bin/CMakeLists.txt b/cpp/bin/CMakeLists.txt new file mode 100644 index 0000000000..6a4d68f512 --- /dev/null +++ b/cpp/bin/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(torchtrtc) diff --git a/cpp/bin/torchtrtc/CMakeLists.txt b/cpp/bin/torchtrtc/CMakeLists.txt new file mode 100644 index 0000000000..0ebfd87609 --- /dev/null +++ b/cpp/bin/torchtrtc/CMakeLists.txt @@ -0,0 +1,25 @@ +set(executable_name "torchtrtc") + +add_executable(${executable_name} + ${CMAKE_CURRENT_SOURCE_DIR}/accuracy.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/fileio.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/parser_util.cpp +) + +if (MSVC) + target_link_libraries(${executable_name} PRIVATE torch torchtrt) +else() + target_link_libraries(${executable_name} PRIVATE torch "-Wl,--no-as-needed" torchtrt "-Wl,--as-needed") + set_target_properties( + ${executable_name} + PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE # + LINK_FLAGS "-Wl,--disable-new-dtags") +endif() + +target_include_directories(${executable_name} + PUBLIC + "$" +) + +install(TARGETS ${executable_name} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}") diff --git a/cpp/include/torch_tensorrt/macros.h b/cpp/include/torch_tensorrt/macros.h index 2ce702ce43..7205570d0d 100644 --- a/cpp/include/torch_tensorrt/macros.h +++ b/cpp/include/torch_tensorrt/macros.h @@ -7,6 +7,9 @@ */ #pragma once +#if defined(USE_CMAKE_GENERATED_EXPORT_HEADER) +#include +#else #if defined(__GNUC__) #define TORCHTRT_API __attribute__((__visibility__("default"))) #define TORCHTRT_HIDDEN __attribute__((__visibility__("hidden"))) @@ -14,6 +17,7 @@ #define TORCHTRT_API #define TORCHTRT_HIDDEN #endif // defined(__GNUC__) +#endif // defined(USE_CMAKE_GENERATED_EXPORT_HEADER) // Does this need to be gaurded or something? #define XSTR(x) #x @@ -31,4 +35,4 @@ namespace torch_tensorrt { namespace torchscript {} namespace ts = torchscript; } // namespace torch_tensorrt -namespace torchtrt = torch_tensorrt; \ No newline at end of file +namespace torchtrt = torch_tensorrt; diff --git a/cpp/include/torch_tensorrt/ptq.h b/cpp/include/torch_tensorrt/ptq.h index 9c82bcba38..e928fa6856 100644 --- a/cpp/include/torch_tensorrt/ptq.h +++ b/cpp/include/torch_tensorrt/ptq.h @@ -18,6 +18,7 @@ #include "NvInfer.h" #include "torch/torch.h" #include "torch_tensorrt/logging.h" +#include "torch_tensorrt/macros.h" #ifndef DOXYGEN_SHOULD_SKIP_THIS namespace nvinfer1 { @@ -27,7 +28,7 @@ class IInt8EntropyCalibrator2; namespace torch_tensorrt { namespace ptq { -bool get_batch_impl(void* bindings[], const char* names[], int nbBindings, torch::Tensor& data); +TORCHTRT_API bool get_batch_impl(void* bindings[], const char* names[], int nbBindings, torch::Tensor& data); } } // namespace torch_tensorrt #endif // DOXYGEN_SHOULD_SKIP_THIS @@ -314,7 +315,7 @@ class Int8CacheCalibrator : Algorithm { */ template -TORCHTRT_API inline Int8Calibrator make_int8_calibrator( +inline Int8Calibrator make_int8_calibrator( DataLoader dataloader, const std::string& cache_file_path, bool use_cache) { @@ -348,7 +349,7 @@ TORCHTRT_API inline Int8Calibrator make_int8_calibrator( * @return Int8CacheCalibrator */ template -TORCHTRT_API inline Int8CacheCalibrator make_int8_cache_calibrator(const std::string& cache_file_path) { +inline Int8CacheCalibrator make_int8_cache_calibrator(const std::string& cache_file_path) { return Int8CacheCalibrator(cache_file_path); } diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index 66706db791..1dca94d9dd 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -15,6 +15,8 @@ #include #include +#include "torch_tensorrt/macros.h" + // Just include the .h? #ifndef DOXYGEN_SHOULD_SKIP_THIS namespace torch { @@ -36,7 +38,6 @@ class IInt8Calibrator; } #endif // DOXYGEN_SHOULD_SKIP_THIS -#include "torch_tensorrt/macros.h" namespace torch_tensorrt { /** * Supported Data Types that can be used with TensorRT engines @@ -45,7 +46,7 @@ namespace torch_tensorrt { * support) so there should not be a reason that you need to use this type * explictly. */ -class TORCHTRT_API DataType { +class DataType { public: /** * Underlying enum class to support the DataType Class @@ -85,7 +86,7 @@ class TORCHTRT_API DataType { * * @param t */ - DataType(c10::ScalarType t); + TORCHTRT_API DataType(c10::ScalarType t); /** * @brief Get the enum value of the DataType object * @@ -137,7 +138,7 @@ class TORCHTRT_API DataType { } private: - friend std::ostream& operator<<(std::ostream& os, const DataType& dtype); + friend TORCHTRT_API std::ostream& operator<<(std::ostream& os, const DataType& dtype); Value value; }; @@ -265,7 +266,7 @@ enum class EngineCapability : int8_t { /** * @brief TensorFormat is an enum class which defines the memeory layout used to store Tensor Data * */ -class TORCHTRT_API TensorFormat { +class TensorFormat { public: /** * Underlying enum class to support the TensorFormat Class @@ -299,7 +300,7 @@ class TORCHTRT_API TensorFormat { * * @param t */ - TensorFormat(at::MemoryFormat t); + TORCHTRT_API TensorFormat(at::MemoryFormat t); /** * @brief Get the enum value of the TensorFormat object * @@ -351,7 +352,7 @@ class TORCHTRT_API TensorFormat { } private: - friend std::ostream& operator<<(std::ostream& os, const TensorFormat& format); + friend TORCHTRT_API std::ostream& operator<<(std::ostream& os, const TensorFormat& format); Value value; }; @@ -363,7 +364,7 @@ class TORCHTRT_API TensorFormat { * signifying a static input shape or a set of three input shapes representing * the min, optiminal and max input shapes allowed for the engine. */ -struct TORCHTRT_API Input { +struct Input { /// Minimum acceptable input size into the engine std::vector min_shape; /// Optimal input size into the engine (size optimized for given kernels accept any size in min max range) @@ -387,7 +388,7 @@ struct TORCHTRT_API Input { * @param shape Input tensor shape * @param format Expected tensor format for the input (Defaults to contiguous) */ - Input(std::vector shape, TensorFormat format = TensorFormat::kContiguous); + TORCHTRT_API Input(std::vector shape, TensorFormat format = TensorFormat::kContiguous); /** * @brief Construct a new Input spec object for static input size from @@ -399,7 +400,7 @@ struct TORCHTRT_API Input { * calculation if detectable else Float32) * @param format Expected tensor format for the input (Defaults to contiguous) */ - Input(std::vector shape, DataType dtype, TensorFormat format = TensorFormat::kContiguous); + TORCHTRT_API Input(std::vector shape, DataType dtype, TensorFormat format = TensorFormat::kContiguous); /** * @brief Construct a new Input spec object for static input size from @@ -411,7 +412,7 @@ struct TORCHTRT_API Input { * @param shape Input tensor shape * @param format Expected tensor format for the input (Defaults to contiguous) */ - Input(c10::ArrayRef shape, TensorFormat format = TensorFormat::kContiguous); + TORCHTRT_API Input(c10::ArrayRef shape, TensorFormat format = TensorFormat::kContiguous); /** * @brief Construct a new Input spec object for static input size from @@ -423,7 +424,7 @@ struct TORCHTRT_API Input { * calculation if detectable else Float32) * @param format Expected tensor format for the input (Defaults to contiguous) */ - Input(c10::ArrayRef shape, DataType dtype, TensorFormat format = TensorFormat::kContiguous); + TORCHTRT_API Input(c10::ArrayRef shape, DataType dtype, TensorFormat format = TensorFormat::kContiguous); /** * @brief Construct a new Input spec object dynamic input size from @@ -436,7 +437,7 @@ struct TORCHTRT_API Input { * @param max_shape Maximum acceptible shape for input tensor * @param format Expected tensor format for the input (Defaults to contiguous) */ - Input( + TORCHTRT_API Input( std::vector min_shape, std::vector opt_shape, std::vector max_shape, @@ -454,7 +455,7 @@ struct TORCHTRT_API Input { * calculation if detectable else Float32) * @param format Expected tensor format for the input (Defaults to contiguous) */ - Input( + TORCHTRT_API Input( std::vector min_shape, std::vector opt_shape, std::vector max_shape, @@ -472,7 +473,7 @@ struct TORCHTRT_API Input { * @param max_shape Maximum acceptible shape for input tensor * @param format Expected tensor format for the input (Defaults to contiguous) */ - Input( + TORCHTRT_API Input( c10::ArrayRef min_shape, c10::ArrayRef opt_shape, c10::ArrayRef max_shape, @@ -490,7 +491,7 @@ struct TORCHTRT_API Input { * calculation if detectable else Float32) * @param format Expected tensor format for the input (Defaults to contiguous) */ - Input( + TORCHTRT_API Input( c10::ArrayRef min_shape, c10::ArrayRef opt_shape, c10::ArrayRef max_shape, @@ -505,10 +506,10 @@ struct TORCHTRT_API Input { * * @param tensor Reference tensor to set shape, type and layout */ - Input(at::Tensor tensor); + TORCHTRT_API Input(at::Tensor tensor); private: - friend std::ostream& operator<<(std::ostream& os, const Input& input); + friend TORCHTRT_API std::ostream& operator<<(std::ostream& os, const Input& input); bool input_is_dynamic; }; @@ -541,7 +542,7 @@ namespace torchscript { * Settings data structure for Torch-TensorRT TorchScript compilation * */ -struct TORCHTRT_API CompileSpec { +struct CompileSpec { /** * @brief Construct a new Compile Spec object * Convienence constructor to set fixed input size from vectors describing @@ -554,7 +555,7 @@ struct TORCHTRT_API CompileSpec { * * @param fixed_sizes */ - CompileSpec(std::vector> fixed_sizes); + TORCHTRT_API CompileSpec(std::vector> fixed_sizes); /** * @brief Construct a new Extra Info object @@ -568,7 +569,7 @@ struct TORCHTRT_API CompileSpec { * * @param fixed_sizes */ - CompileSpec(std::vector> fixed_sizes); + TORCHTRT_API CompileSpec(std::vector> fixed_sizes); /** * @brief Construct a new Extra Info object from input ranges. diff --git a/docsrc/tutorials/installation.rst b/docsrc/tutorials/installation.rst index 4c4905db96..c0ce261c65 100644 --- a/docsrc/tutorials/installation.rst +++ b/docsrc/tutorials/installation.rst @@ -238,6 +238,35 @@ To build using the pre-CXX11 ABI use the ``pre_cxx11_abi`` config bazel build //:libtorchtrt --config pre_cxx11_abi -c [dbg/opt] +**Building with CMake** +----------------------- + +It is possible to build the API libraries (in cpp/) and the torchtrtc executable using CMake instead of Bazel. +Currently, the python API and the tests cannot be built with CMake. +Begin by installing CMake. + + * Latest releases of CMake and instructions on how to install are available for different platforms + [on their website](https://cmake.org/download/). + +A few useful CMake options include: + + * CMake finders for TensorRT and cuDNN are provided in `cmake/Modules`. In order for CMake to use them, pass + `-DCMAKE_MODULE_PATH=cmake/Modules` when configuring the project with CMake. + * Libtorch provides its own CMake finder. In case CMake doesn't find it, pass the path to your install of + libtorch with `-DTorch_DIR=/share/cmake/Torch` + * If TensorRT is not found with the provided cmake finder, specify `-DTensorRT_ROOT=` + * Finally, configure and build the project in a build directory of your choice with the following command + from the root of Torch-TensorRT project: + + .. code-block:: shell + + cmake -S. -B \ + [-DCMAKE_MODULE_PATH=cmake/Module] \ + [-DTorch_DIR=/share/cmake/Torch] \ + [-DTensorRT_ROOT=] \ + [-DCMAKE_BUILD_TYPE=Debug|Release] + cmake --build + **Building the Python package** -------------------------------- diff --git a/examples/int8/ptq/CMakeLists.txt b/examples/int8/ptq/CMakeLists.txt new file mode 100644 index 0000000000..943c41b1cb --- /dev/null +++ b/examples/int8/ptq/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.17) +project(ptq LANGUAGES CXX) + +# Find dependencies +find_package(Torch REQUIRED) +find_package(torchtrt REQUIRED) + +set(SRCS + ${CMAKE_SOURCE_DIR}/main.cpp + ${CMAKE_SOURCE_DIR}/../benchmark/benchmark.cpp + ${CMAKE_SOURCE_DIR}/../datasets/cifar10.cpp + +) + +add_executable(${CMAKE_PROJECT_NAME} ${SRCS}) +target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${CMAKE_SOURCE_DIR}/../../..") +target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE torch "-Wl,--no-as-needed" torchtrt "-Wl,--as-needed") diff --git a/examples/int8/qat/CMakeLists.txt b/examples/int8/qat/CMakeLists.txt new file mode 100644 index 0000000000..11c48736b4 --- /dev/null +++ b/examples/int8/qat/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 3.17) +project(qat LANGUAGES CXX) + +# Find dependencies +find_package(Torch REQUIRED) +find_package(torchtrt REQUIRED) + +set(SRCS + ${CMAKE_SOURCE_DIR}/main.cpp + ${CMAKE_SOURCE_DIR}/../benchmark/benchmark.cpp + ${CMAKE_SOURCE_DIR}/../datasets/cifar10.cpp + +) + +add_executable(${CMAKE_PROJECT_NAME} ${SRCS}) +target_include_directories(${CMAKE_PROJECT_NAME} PRIVATE "${CMAKE_SOURCE_DIR}/../../..") +target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE torch "-Wl,--no-as-needed" torchtrt "-Wl,--as-needed") diff --git a/examples/torchtrt_runtime_example/CMakeLists.txt b/examples/torchtrt_runtime_example/CMakeLists.txt new file mode 100644 index 0000000000..055c21b51c --- /dev/null +++ b/examples/torchtrt_runtime_example/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.17) +project(torchtrt_runtime_example LANGUAGES CXX) + +# Find dependencies +find_package(Torch REQUIRED) +find_package(torchtrt REQUIRED) + +set(SRCS + main.cpp +) + +add_executable(${CMAKE_PROJECT_NAME} ${SRCS}) +target_link_libraries(${CMAKE_PROJECT_NAME} PRIVATE torch "-Wl,--no-as-needed" torchtrt_runtime "-Wl,--as-needed") diff --git a/third_party/googletest/googletest.cmake b/third_party/googletest/googletest.cmake new file mode 100644 index 0000000000..d20bee0a0d --- /dev/null +++ b/third_party/googletest/googletest.cmake @@ -0,0 +1,7 @@ +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.11.0 +) + +FetchContent_MakeAvailable(googletest)